80 files changed, 2505 insertions, 428 deletions
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
index 37b33cbe4d..e9a7e7d070 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
@@ -392,10 +392,15 @@ static void dispatch_rw_block_io(blkif_t *blkif,
 	for (i = 0; i < nseg; i++) {
 		if (unlikely(map[i].status != 0)) {
 			DPRINTK("invalid buffer -- could not remap it\n");
-			goto fail_flush;
+			map[i].handle = BLKBACK_INVALID_HANDLE;
+			ret |= 1;
 		}
 
 		pending_handle(pending_req, i) = map[i].handle;
+
+		if (ret)
+			continue;
+
 		set_phys_to_machine(__pa(vaddr(
 			pending_req, i)) >> PAGE_SHIFT,
 			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
@@ -403,6 +408,9 @@ static void dispatch_rw_block_io(blkif_t *blkif,
 			(req->seg[i].first_sect << 9);
 	}
 
+	if (ret)
+		goto fail_flush;
+
 	if (vbd_translate(&preq, blkif, operation) != 0) {
 		DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
 			operation == READ ? "read" : "write",
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index e79b653a97..63ebf8ed93 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -48,6 +48,10 @@
 #include <asm/hypervisor.h>
 #include <asm/maddr.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 #define BLKIF_STATE_DISCONNECTED 0
 #define BLKIF_STATE_CONNECTED    1
 #define BLKIF_STATE_SUSPENDED    2
@@ -468,6 +472,27 @@ int blkif_ioctl(struct inode *inode, struct file *filep,
 		      command, (long)argument, inode->i_rdev);
 
 	switch (command) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+	case HDIO_GETGEO: {
+		struct block_device *bd = inode->i_bdev;
+		struct hd_geometry geo;
+		int ret;
+
+                if (!argument)
+                        return -EINVAL;
+
+		geo.start = get_start_sect(bd);
+		ret = blkif_getgeo(bd, &geo);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((struct hd_geometry __user *)argument, &geo,
+				 sizeof(geo)))
+                        return -EFAULT;
+
+                return 0;
+	}
+#endif
 	case CDROMMULTISESSION:
 		DPRINTK("FIXME: support multisession CDs later\n");
 		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
index 8aa453d3a0..0c8b508c9a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -36,6 +36,10 @@
 #include <linux/blkdev.h>
 #include <linux/list.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 #define BLKIF_MAJOR(dev) ((dev)>>8)
 #define BLKIF_MINOR(dev) ((dev) & 0xff)
 
@@ -91,7 +95,9 @@ static struct block_device_operations xlvbd_block_fops =
 	.open = blkif_open,
 	.release = blkif_release,
 	.ioctl  = blkif_ioctl,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
 	.getgeo = blkif_getgeo
+#endif
 };
 
 DEFINE_SPINLOCK(blkif_io_lock);
@@ -186,7 +192,11 @@ xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 	if (rq == NULL)
 		return -1;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
 	elevator_init(rq, "noop");
+#else
+	elevator_init(rq, &elevator_noop);
+#endif
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_hardsect_size(rq, sector_size);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/features.c b/linux-2.6-xen-sparse/drivers/xen/core/features.c
index 4d50caf50b..a76f58c04d 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/features.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c
@@ -11,6 +11,10 @@
 #include <asm/hypervisor.h>
 #include <xen/features.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
 /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
 EXPORT_SYMBOL(xen_features);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
index 3195279a87..c5132c13bb 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
@@ -44,6 +44,10 @@
 #include <asm/io.h>
 #include <xen/interface/memory.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
 
diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
index 26e0610d15..e03e44a05a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
@@ -64,6 +64,10 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 /*
  * Mutually-exclusive module options to select receive data path:
  *  rx_copy : Packets are copied by network backend into local memory
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
index d7c7d05172..ce5acc2457 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
@@ -9,4 +9,5 @@ xenbus-objs += xenbus_client.o
 xenbus-objs += xenbus_comms.o
 xenbus-objs += xenbus_xs.o
 xenbus-objs += xenbus_probe.o
+obj-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
 obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
index 9b389ec06b..fd8355f6dd 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
@@ -35,6 +35,10 @@
 #include <xen/xenbus.h>
 #include <xen/driver_util.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 /* xenbus_probe.c */
 extern char *kasprintf(const char *fmt, ...);
 
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
index 38da320b67..ea8f3c283e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
@@ -39,6 +39,10 @@
 #include <xen/xenbus.h>
 #include "xenbus_comms.h"
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 static int xenbus_irq;
 
 extern void xenbus_probe(void *);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
index bbe4a8c5a8..ba37e61856 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
@@ -40,6 +40,7 @@
 #include <linux/wait.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
+#include <linux/mutex.h>
 
 #include "xenbus_comms.h"
 
@@ -49,6 +50,10 @@
 #include <xen/xen_proc.h>
 #include <asm/hypervisor.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 struct xenbus_dev_transaction {
 	struct list_head list;
 	struct xenbus_transaction handle;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
index 87c9e6ed90..13e9f2105d 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
@@ -42,6 +42,7 @@
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/kthread.h>
+#include <linux/mutex.h>
 
 #include <asm/io.h>
 #include <asm/page.h>
@@ -55,6 +56,11 @@
 #include <xen/hvm.h>
 
 #include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
 
 int xen_store_evtchn;
 struct xenstore_domain_interface *xen_store_interface;
@@ -67,12 +73,7 @@ static struct notifier_block *xenstore_chain;
 static void wait_for_devices(struct xenbus_driver *xendrv);
 
 static int xenbus_probe_frontend(const char *type, const char *name);
-static int xenbus_uevent_backend(struct device *dev, char **envp,
-				 int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
 
-static int xenbus_dev_probe(struct device *_dev);
-static int xenbus_dev_remove(struct device *_dev);
 static void xenbus_dev_shutdown(struct device *_dev);
 
 /* If something in array of ids matches this device, return it. */
@@ -86,7 +87,7 @@ match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
 	return NULL;
 }
 
-static int xenbus_match(struct device *_dev, struct device_driver *_drv)
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
 {
 	struct xenbus_driver *drv = to_xenbus_driver(_drv);
 
@@ -96,17 +97,6 @@ static int xenbus_match(struct device *_dev, struct device_driver *_drv)
 	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
 }
 
-struct xen_bus_type
-{
-	char *root;
-	unsigned int levels;
-	int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
-	int (*probe)(const char *type, const char *dir);
-	struct bus_type bus;
-	struct device dev;
-};
-
-
 /* device/<type>/<id> => <type>-<id> */
 static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
 {
@@ -143,7 +133,7 @@ static void free_otherend_watch(struct xenbus_device *dev)
 }
 
 
-static int read_otherend_details(struct xenbus_device *xendev,
+int read_otherend_details(struct xenbus_device *xendev,
 				 char *id_node, char *path_node)
 {
 	int err = xenbus_gather(XBT_NIL, xendev->nodename,
@@ -176,12 +166,6 @@ static int read_backend_details(struct xenbus_device *xendev)
 }
 
 
-static int read_frontend_details(struct xenbus_device *xendev)
-{
-	return read_otherend_details(xendev, "frontend-id", "frontend");
-}
-
-
 /* Bus type for frontend drivers. */
 static struct xen_bus_type xenbus_frontend = {
 	.root = "device",
@@ -191,115 +175,17 @@ static struct xen_bus_type xenbus_frontend = {
 	.bus = {
 		.name     = "xen",
 		.match    = xenbus_match,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
 		.probe    = xenbus_dev_probe,
 		.remove   = xenbus_dev_remove,
 		.shutdown = xenbus_dev_shutdown,
+#endif
 	},
 	.dev = {
 		.bus_id = "xen",
 	},
 };
 
-/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
-static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
-{
-	int domid, err;
-	const char *devid, *type, *frontend;
-	unsigned int typelen;
-
-	type = strchr(nodename, '/');
-	if (!type)
-		return -EINVAL;
-	type++;
-	typelen = strcspn(type, "/");
-	if (!typelen || type[typelen] != '/')
-		return -EINVAL;
-
-	devid = strrchr(nodename, '/') + 1;
-
-	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
-			    "frontend", NULL, &frontend,
-			    NULL);
-	if (err)
-		return err;
-	if (strlen(frontend) == 0)
-		err = -ERANGE;
-	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
-		err = -ENOENT;
-
-	kfree(frontend);
-
-	if (err)
-		return err;
-
-	if (snprintf(bus_id, BUS_ID_SIZE,
-		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
-		return -ENOSPC;
-	return 0;
-}
-
-static struct xen_bus_type xenbus_backend = {
-	.root = "backend",
-	.levels = 3, 		/* backend/type/<frontend>/<id> */
-	.get_bus_id = backend_bus_id,
-	.probe = xenbus_probe_backend,
-	.bus = {
-		.name     = "xen-backend",
-		.match    = xenbus_match,
-		.probe    = xenbus_dev_probe,
-		.remove   = xenbus_dev_remove,
-//		.shutdown = xenbus_dev_shutdown,
-		.uevent   = xenbus_uevent_backend,
-	},
-	.dev = {
-		.bus_id = "xen-backend",
-	},
-};
-
-static int xenbus_uevent_backend(struct device *dev, char **envp,
-				 int num_envp, char *buffer, int buffer_size)
-{
-	struct xenbus_device *xdev;
-	struct xenbus_driver *drv;
-	int i = 0;
-	int length = 0;
-
-	DPRINTK("");
-
-	if (dev == NULL)
-		return -ENODEV;
-
-	xdev = to_xenbus_device(dev);
-	if (xdev == NULL)
-		return -ENODEV;
-
-	/* stuff we want to pass to /sbin/hotplug */
-	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
-		       "XENBUS_TYPE=%s", xdev->devicetype);
-
-	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
-		       "XENBUS_PATH=%s", xdev->nodename);
-
-	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
-		       "XENBUS_BASE_PATH=%s", xenbus_backend.root);
-
-	/* terminate, set to next free slot, shrink available space */
-	envp[i] = NULL;
-	envp = &envp[i];
-	num_envp -= i;
-	buffer = &buffer[length];
-	buffer_size -= length;
-
-	if (dev->driver) {
-		drv = to_xenbus_driver(dev->driver);
-		if (drv && drv->uevent)
-			return drv->uevent(xdev, envp, num_envp, buffer,
-					   buffer_size);
-	}
-
-	return 0;
-}
-
 static void otherend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
 {
@@ -359,7 +245,7 @@ static int watch_otherend(struct xenbus_device *dev)
 }
 
 
-static int xenbus_dev_probe(struct device *_dev)
+int xenbus_dev_probe(struct device *_dev)
 {
 	struct xenbus_device *dev = to_xenbus_device(_dev);
 	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -406,7 +292,7 @@ fail:
 	return -ENODEV;
 }
 
-static int xenbus_dev_remove(struct device *_dev)
+int xenbus_dev_remove(struct device *_dev)
 {
 	struct xenbus_device *dev = to_xenbus_device(_dev);
 	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -444,14 +330,21 @@ static void xenbus_dev_shutdown(struct device *_dev)
 	put_device(&dev->dev);
 }
 
-static int xenbus_register_driver_common(struct xenbus_driver *drv,
-					 struct xen_bus_type *bus)
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+				  struct xen_bus_type *bus)
 {
 	int ret;
 
 	drv->driver.name = drv->name;
 	drv->driver.bus = &bus->bus;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
 	drv->driver.owner = drv->owner;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+	drv->driver.probe = xenbus_dev_probe;
+	drv->driver.remove = xenbus_dev_remove;
+	drv->driver.shutdown = xenbus_dev_shutdown;
+#endif
 
 	mutex_lock(&xenwatch_mutex);
 	ret = driver_register(&drv->driver);
@@ -476,14 +369,6 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
 }
 EXPORT_SYMBOL_GPL(xenbus_register_frontend);
 
-int xenbus_register_backend(struct xenbus_driver *drv)
-{
-	drv->read_otherend_details = read_frontend_details;
-
-	return xenbus_register_driver_common(drv, &xenbus_backend);
-}
-EXPORT_SYMBOL_GPL(xenbus_register_backend);
-
 void xenbus_unregister_driver(struct xenbus_driver *drv)
 {
 	driver_unregister(&drv->driver);
@@ -581,23 +466,29 @@ char *kasprintf(const char *fmt, ...)
 }
 
 static ssize_t xendev_show_nodename(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+				    struct device_attribute *attr,
+#endif
+				    char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
 }
 DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
 
 static ssize_t xendev_show_devtype(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+				   struct device_attribute *attr,
+#endif
+				   char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
 }
 DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
 
 
-static int xenbus_probe_node(struct xen_bus_type *bus,
-			     const char *type,
-			     const char *nodename)
+int xenbus_probe_node(struct xen_bus_type *bus,
+		      const char *type,
+		      const char *nodename)
 {
 	int err;
 	struct xenbus_device *xendev;
@@ -667,55 +558,6 @@ static int xenbus_probe_frontend(const char *type, const char *name)
 	return err;
 }
 
-/* backend/<typename>/<frontend-uuid>/<name> */
-static int xenbus_probe_backend_unit(const char *dir,
-				     const char *type,
-				     const char *name)
-{
-	char *nodename;
-	int err;
-
-	nodename = kasprintf("%s/%s", dir, name);
-	if (!nodename)
-		return -ENOMEM;
-
-	DPRINTK("%s\n", nodename);
-
-	err = xenbus_probe_node(&xenbus_backend, type, nodename);
-	kfree(nodename);
-	return err;
-}
-
-/* backend/<typename>/<frontend-domid> */
-static int xenbus_probe_backend(const char *type, const char *domid)
-{
-	char *nodename;
-	int err = 0;
-	char **dir;
-	unsigned int i, dir_n = 0;
-
-	DPRINTK("");
-
-	nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
-	if (!nodename)
-		return -ENOMEM;
-
-	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
-	if (IS_ERR(dir)) {
-		kfree(nodename);
-		return PTR_ERR(dir);
-	}
-
-	for (i = 0; i < dir_n; i++) {
-		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
-		if (err)
-			break;
-	}
-	kfree(dir);
-	kfree(nodename);
-	return err;
-}
-
 static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
 {
 	int err = 0;
@@ -736,7 +578,7 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
 	return err;
 }
 
-static int xenbus_probe_devices(struct xen_bus_type *bus)
+int xenbus_probe_devices(struct xen_bus_type *bus)
 {
 	int err = 0;
 	char **dir;
@@ -778,7 +620,7 @@ static int strsep_len(const char *str, char c, unsigned int len)
 	return (len == 0) ? i : -ERANGE;
 }
 
-static void dev_changed(const char *node, struct xen_bus_type *bus)
+void dev_changed(const char *node, struct xen_bus_type *bus)
 {
 	int exists, rootlen;
 	struct xenbus_device *dev;
@@ -823,25 +665,12 @@ static void frontend_changed(struct xenbus_watch *watch,
 	dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
 }
 
-static void backend_changed(struct xenbus_watch *watch,
-			    const char **vec, unsigned int len)
-{
-	DPRINTK("");
-
-	dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
-}
-
 /* We watch for devices appearing and vanishing. */
 static struct xenbus_watch fe_watch = {
 	.node = "device",
 	.callback = frontend_changed,
 };
 
-static struct xenbus_watch be_watch = {
-	.node = "backend",
-	.callback = backend_changed,
-};
-
 static int suspend_dev(struct device *dev, void *data)
 {
 	int err = 0;
@@ -912,7 +741,7 @@ void xenbus_suspend(void)
 	DPRINTK("");
 
 	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
-	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+	xenbus_backend_suspend(suspend_dev);
 	xs_suspend();
 }
 EXPORT_SYMBOL_GPL(xenbus_suspend);
@@ -922,7 +751,7 @@ void xenbus_resume(void)
 	xb_init_comms();
 	xs_resume();
 	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
-	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
+	xenbus_backend_resume(resume_dev);
 }
 EXPORT_SYMBOL_GPL(xenbus_resume);
 
@@ -955,20 +784,17 @@ void xenbus_probe(void *unused)
 {
 	BUG_ON((xenstored_ready <= 0));
 
-	/* Enumerate devices in xenstore. */
+	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_frontend);
-	xenbus_probe_devices(&xenbus_backend);
-
-	/* Watch for changes. */
 	register_xenbus_watch(&fe_watch);
-	register_xenbus_watch(&be_watch);
+	xenbus_backend_probe_and_watch();
 
 	/* Notify others that xenstore is up */
 	notifier_call_chain(&xenstore_chain, 0, NULL);
 }
 
 
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
 static struct file_operations xsd_kva_fops;
 static struct proc_dir_entry *xsd_kva_intf;
 static struct proc_dir_entry *xsd_port_intf;
@@ -1020,7 +846,7 @@ static int __init xenbus_probe_init(void)
 
 	/* Register ourselves with the kernel bus subsystem */
 	bus_register(&xenbus_frontend.bus);
-	bus_register(&xenbus_backend.bus);
+	xenbus_backend_bus_register();
 
 	/*
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -1049,7 +875,7 @@ static int __init xenbus_probe_init(void)
 		xen_store_evtchn = xen_start_info->store_evtchn =
 			alloc_unbound.port;
 
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
 		/* And finally publish the above info in /proc/xen */
 		xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
 		if (xsd_kva_intf) {
@@ -1091,7 +917,7 @@ static int __init xenbus_probe_init(void)
 
 	/* Register ourselves with the kernel device subsystem */
 	device_register(&xenbus_frontend.dev);
-	device_register(&xenbus_backend.dev);
+	xenbus_backend_device_register();
 
 	if (!is_initial_xendomain())
 		xenbus_probe(NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 0000000000..1f61c6cca6
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern void xenbus_backend_bus_register(void);
+extern void xenbus_backend_device_register(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline void xenbus_backend_bus_register(void) {}
+static inline void xenbus_backend_device_register(void) {}
+#endif
+
+struct xen_bus_type
+{
+	char *root;
+	unsigned int levels;
+	int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+	int (*probe)(const char *type, const char *dir);
+	struct bus_type bus;
+	struct device dev;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+					 struct xen_bus_type *bus);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+			     const char *type,
+			     const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void dev_changed(const char *node, struct xen_bus_type *bus);
+
+/* Simplified asprintf. Probably belongs in lib */
+extern char *kasprintf(const char *fmt, ...);
+
+#endif
+
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
new file mode 100644
index 0000000000..7f0dedd577
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -0,0 +1,271 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have (backend half).
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...)				\
+	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
+		 __FUNCTION__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/maddr.h>
+#include <asm/pgtable.h>
+#include <asm/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/xen_proc.h>
+#include <xen/evtchn.h>
+#include <xen/features.h>
+#include <xen/hvm.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+				 int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+extern int read_otherend_details(struct xenbus_device *xendev,
+				 char *id_node, char *path_node);
+
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+	return read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+	int domid, err;
+	const char *devid, *type, *frontend;
+	unsigned int typelen;
+
+	type = strchr(nodename, '/');
+	if (!type)
+		return -EINVAL;
+	type++;
+	typelen = strcspn(type, "/");
+	if (!typelen || type[typelen] != '/')
+		return -EINVAL;
+
+	devid = strrchr(nodename, '/') + 1;
+
+	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
+			    "frontend", NULL, &frontend,
+			    NULL);
+	if (err)
+		return err;
+	if (strlen(frontend) == 0)
+		err = -ERANGE;
+	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
+		err = -ENOENT;
+	kfree(frontend);
+
+	if (err)
+		return err;
+
+	if (snprintf(bus_id, BUS_ID_SIZE,
+		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+		return -ENOSPC;
+	return 0;
+}
+
+static struct xen_bus_type xenbus_backend = {
+	.root = "backend",
+	.levels = 3, 		/* backend/type/<frontend>/<id> */
+	.get_bus_id = backend_bus_id,
+	.probe = xenbus_probe_backend,
+	.bus = {
+		.name     = "xen-backend",
+		.match    = xenbus_match,
+		.probe    = xenbus_dev_probe,
+		.remove   = xenbus_dev_remove,
+//		.shutdown = xenbus_dev_shutdown,
+		.uevent   = xenbus_uevent_backend,
+	},
+	.dev = {
+		.bus_id = "xen-backend",
+	},
+};
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+				 int num_envp, char *buffer, int buffer_size)
+{
+	struct xenbus_device *xdev;
+	struct xenbus_driver *drv;
+	int i = 0;
+	int length = 0;
+
+	DPRINTK("");
+
+	if (dev == NULL)
+		return -ENODEV;
+
+	xdev = to_xenbus_device(dev);
+	if (xdev == NULL)
+		return -ENODEV;
+
+	/* stuff we want to pass to /sbin/hotplug */
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_TYPE=%s", xdev->devicetype);
+
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_PATH=%s", xdev->nodename);
+
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_BASE_PATH=%s", xenbus_backend.root);
+
+	/* terminate, set to next free slot, shrink available space */
+	envp[i] = NULL;
+	envp = &envp[i];
+	num_envp -= i;
+	buffer = &buffer[length];
+	buffer_size -= length;
+
+	if (dev->driver) {
+		drv = to_xenbus_driver(dev->driver);
+		if (drv && drv->uevent)
+			return drv->uevent(xdev, envp, num_envp, buffer,
+					   buffer_size);
+	}
+
+	return 0;
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+	drv->read_otherend_details = read_frontend_details;
+
+	return xenbus_register_driver_common(drv, &xenbus_backend);
+}
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+				     const char *type,
+				     const char *name)
+{
+	char *nodename;
+	int err;
+
+	nodename = kasprintf("%s/%s", dir, name);
+	if (!nodename)
+		return -ENOMEM;
+
+	DPRINTK("%s\n", nodename);
+
+	err = xenbus_probe_node(&xenbus_backend, type, nodename);
+	kfree(nodename);
+	return err;
+}
+
+/* backend/<typename>/<frontend-domid> */
+static int xenbus_probe_backend(const char *type, const char *domid)
+{
+	char *nodename;
+	int err = 0;
+	char **dir;
+	unsigned int i, dir_n = 0;
+
+	DPRINTK("");
+
+	nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
+	if (!nodename)
+		return -ENOMEM;
+
+	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
+	if (IS_ERR(dir)) {
+		kfree(nodename);
+		return PTR_ERR(dir);
+	}
+
+	for (i = 0; i < dir_n; i++) {
+		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+		if (err)
+			break;
+	}
+	kfree(dir);
+	kfree(nodename);
+	return err;
+}
+
+static void backend_changed(struct xenbus_watch *watch,
+			    const char **vec, unsigned int len)
+{
+	DPRINTK("");
+
+	dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
+}
+
+static struct xenbus_watch be_watch = {
+	.node = "backend",
+	.callback = backend_changed,
+};
+
+void xenbus_backend_suspend(int (*fn)(struct device *, void *))
+{
+	DPRINTK("");
+	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_resume(int (*fn)(struct device *, void *))
+{
+	DPRINTK("");
+	bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_probe_and_watch(void)
+{
+	xenbus_probe_devices(&xenbus_backend);
+	register_xenbus_watch(&be_watch);
+}
+
+void xenbus_backend_bus_register(void)
+{
+	bus_register(&xenbus_backend.bus);
+}
+
+void xenbus_backend_device_register(void)
+{
+	device_register(&xenbus_backend.dev);
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
index 190fa1e794..1c1fc576c0 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
@@ -42,9 +42,15 @@
 #include <linux/fcntl.h>
 #include <linux/kthread.h>
 #include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
 #include <xen/xenbus.h>
 #include "xenbus_comms.h"
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 /* xenbus_probe.c */
 extern char *kasprintf(const char *fmt, ...);
 
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
index 6a4e5e4508..807ca388c5 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
@@ -9,6 +9,10 @@
 
 #include <linux/config.h>
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 #define ADDR (*(volatile long *) addr)
 
 static __inline__ void synch_set_bit(int nr, volatile void * addr)
diff --git a/linux-2.6-xen-sparse/include/xen/xenbus.h b/linux-2.6-xen-sparse/include/xen/xenbus.h
index 8e259ce777..c7cb7eaa3a 100644
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h
@@ -38,6 +38,7 @@
 #include <linux/notifier.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
+#include <linux/init.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
diff --git a/tools/blktap/drivers/blktapctrl.c b/tools/blktap/drivers/blktapctrl.c
index 1c27e9eae7..0b00bc4bfd 100644
--- a/tools/blktap/drivers/blktapctrl.c
+++ b/tools/blktap/drivers/blktapctrl.c
@@ -204,81 +204,49 @@ static blkif_t *test_path(char *path, char **dev, int *type)
 
 static void add_disktype(blkif_t *blkif, int type)
 {
-	driver_list_entry_t *entry, *ptr, *last;
+	driver_list_entry_t *entry, **pprev;
 
-	if (type > MAX_DISK_TYPES) return;
+	if (type > MAX_DISK_TYPES)
+		return;
 
 	entry = malloc(sizeof(driver_list_entry_t));
 	entry->blkif = blkif;
-	entry->next = NULL;
-	ptr = active_disks[type];
+	entry->next  = NULL;
 
-	if (ptr == NULL) {
-		active_disks[type] = entry;
-		entry->prev = NULL;
-		return;
-	}
-
-	while (ptr != NULL) {
-		last = ptr;
-		ptr = ptr->next;
-	}
+	pprev = &active_disks[type];
+	while (*pprev != NULL)
+		pprev = &(*pprev)->next;
 
-	/*We've found the end of the list*/
-        last->next = entry;
-	entry->prev = last;
-	
-	return;
+	*pprev = entry;
+	entry->pprev = pprev;
 }
 
 static int del_disktype(blkif_t *blkif)
 {
-	driver_list_entry_t *ptr, *cur, *last;
+	driver_list_entry_t *entry, **pprev;
 	int type = blkif->drivertype, count = 0, close = 0;
 
-	if (type > MAX_DISK_TYPES) return 1;
-
-	ptr = active_disks[type];
-	last = NULL;
-	while (ptr != NULL) {
-		count++;
-		if (blkif == ptr->blkif) {
-			cur = ptr;
-			if (ptr->next != NULL) {
-				/*There's more later in the chain*/
-				if (!last) {
-					/*We're first in the list*/
-					active_disks[type] = ptr->next;
-					ptr = ptr->next;
-					ptr->prev = NULL;
-				}
-				else {
-					/*We're sandwiched*/
-					last->next = ptr->next;
-					ptr = ptr->next;
-					ptr->prev = last;
-				}
-				
-			} else if (last) {
-				/*There's more earlier in the chain*/
-				last->next = NULL;
-			} else {
-				/*We're the only entry*/
-				active_disks[type] = NULL;
-				if(dtypes[type]->single_handler == 1) 
-					close = 1;
-			}
-			DPRINTF("DEL_DISKTYPE: Freeing entry\n");
-			free(cur);
-			if (dtypes[type]->single_handler == 0) close = 1;
+	if (type > MAX_DISK_TYPES)
+		return 1;
 
-			return close;
-		}
-		last = ptr;
-		ptr = ptr->next;
+	pprev = &active_disks[type];
+	while ((*pprev != NULL) && ((*pprev)->blkif != blkif))
+		pprev = &(*pprev)->next;
+
+	if ((entry = *pprev) == NULL) {
+		DPRINTF("DEL_DISKTYPE: No match\n");
+		return 1;
 	}
-	DPRINTF("DEL_DISKTYPE: No match\n");
-	return 1;
+
+	*pprev = entry->next;
+	if (entry->next)
+		entry->next->pprev = pprev;
+
+	DPRINTF("DEL_DISKTYPE: Freeing entry\n");
+	free(entry);
+
+	/* Caller should close() if no single controller, or list is empty. */
+	return (!dtypes[type]->single_handler || (active_disks[type] == NULL));
 }
 
 static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)
@@ -592,8 +560,8 @@ int unmap_blktapctrl(blkif_t *blkif)
 	if (del_disktype(blkif)) {
 		close(blkif->fds[WRITE]);
 		close(blkif->fds[READ]);
-
 	}
+
 	return 0;
 }
 
diff --git a/tools/blktap/drivers/tapdisk.c b/tools/blktap/drivers/tapdisk.c
index 7c88027eb3..859687d8b3 100644
--- a/tools/blktap/drivers/tapdisk.c
+++ b/tools/blktap/drivers/tapdisk.c
@@ -79,31 +79,17 @@ static void unmap_disk(struct td_state *s)
 {
 	tapdev_info_t *info = s->ring_info;
 	struct tap_disk *drv = s->drv;
-	fd_list_entry_t *ptr, *prev;
+	fd_list_entry_t *entry;
 
 	drv->td_close(s);
 
 	if (info != NULL && info->mem > 0)
 	        munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
 
-	ptr = s->fd_entry;
-	prev = ptr->prev;
-
-	if (prev) {
-		/*There are entries earlier in the list*/
-		prev->next = ptr->next;
-		if (ptr->next) {
-			ptr = ptr->next;
-			ptr->prev = prev;
-		}
-	} else {
-		/*We are the first entry in list*/
-		if (ptr->next) {
-			ptr = ptr->next;
-			fd_start = ptr;
-			ptr->prev = NULL;
-		} else fd_start = NULL;
-	}
+	entry = s->fd_entry;
+	*entry->pprev = entry->next;
+	if (entry->next)
+		entry->next->pprev = entry->pprev;
 
 	close(info->fd);
 
@@ -144,35 +130,29 @@ static inline int LOCAL_FD_SET(fd_set *readfds)
 	return 0;
 }
 
-static inline fd_list_entry_t *add_fd_entry(int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
+static inline fd_list_entry_t *add_fd_entry(
+	int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
 {
-	fd_list_entry_t *ptr, *last, *entry;
+	fd_list_entry_t **pprev, *entry;
 	int i;
+
 	DPRINTF("Adding fd_list_entry\n");
 
 	/*Add to linked list*/
 	s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
 	entry->tap_fd = tap_fd;
-	for (i = 0; i < MAX_IOFD; i++) entry->io_fd[i] = io_fd[i];
+	for (i = 0; i < MAX_IOFD; i++)
+		entry->io_fd[i] = io_fd[i];
 	entry->s = s;
 	entry->next = NULL;
 
-	ptr = fd_start;
-	if (ptr == NULL) {
-		/*We are the first entry*/
-		fd_start = entry;
-		entry->prev = NULL;
-		goto finish;
-	}
+	pprev = &fd_start;
+	while (*pprev != NULL)
+		pprev = &(*pprev)->next;
 
-	while (ptr != NULL) {
-		last = ptr;
-		ptr = ptr->next;
-	}
-	last->next = entry;
-	entry->prev = last;
+	*pprev = entry;
+	entry->pprev = pprev;
 
- finish:
 	return entry;
 }
 
diff --git a/tools/blktap/drivers/tapdisk.h b/tools/blktap/drivers/tapdisk.h
index 1f03156456..238350016b 100644
--- a/tools/blktap/drivers/tapdisk.h
+++ b/tools/blktap/drivers/tapdisk.h
@@ -191,9 +191,8 @@ static disk_info_t *dtypes[] = {
 };
 
 typedef struct driver_list_entry {
-	void *blkif;
-	void *prev;
-	void *next;
+	struct blkif *blkif;
+	struct driver_list_entry **pprev, *next;
 } driver_list_entry_t;
 
 typedef struct fd_list_entry {
@@ -201,8 +200,7 @@ typedef struct fd_list_entry {
 	int  tap_fd;
 	int  io_fd[MAX_IOFD];
 	struct td_state *s;
-	void *prev;
-	void *next;
+	struct fd_list_entry **pprev, *next;
 } fd_list_entry_t;
 
 int qcow_create(const char *filename, uint64_t total_size,
diff --git a/tools/firmware/acpi/acpi_fadt.h b/tools/firmware/acpi/acpi_fadt.h
index d1ecea5588..f30a1dac98 100644
--- a/tools/firmware/acpi/acpi_fadt.h
+++ b/tools/firmware/acpi/acpi_fadt.h
@@ -18,6 +18,8 @@
 #ifndef _FADT_H_
 #define _FADT_H_
 
+#include <xen/hvm/ioreq.h>
+
 //
 // FADT Definitions, see ACPI 2.0 specification for details.
 //
@@ -51,7 +53,9 @@
 //
 // Fixed Feature Flags
 // 
-#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC)
+#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1 | ACPI_SLP_BUTTON | \
+                                  ACPI_WBINVD | ACPI_PWR_BUTTON | \
+                                  ACPI_FIX_RTC | ACPI_TMR_VAL_EXT)
 
 //
 // PM1A Event Register Block Generic Address Information
@@ -59,7 +63,6 @@
 #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
 #define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x20
 #define ACPI_PM1A_EVT_BLK_BIT_OFFSET        0x00
-#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
 
 //
 // PM1B Event Register Block Generic Address Information
@@ -75,7 +78,6 @@
 #define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
 #define ACPI_PM1A_CNT_BLK_BIT_WIDTH         0x10
 #define ACPI_PM1A_CNT_BLK_BIT_OFFSET        0x00
-#define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
 
 //
 // PM1B Control Register Block Generic Address Information
@@ -100,7 +102,6 @@
 #define ACPI_PM_TMR_BLK_ADDRESS_SPACE_ID    ACPI_SYSTEM_IO
 #define ACPI_PM_TMR_BLK_BIT_WIDTH           0x20
 #define ACPI_PM_TMR_BLK_BIT_OFFSET          0x00
-#define ACPI_PM_TMR_BLK_ADDRESS             (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
 
 //
 // General Purpose Event 0 Register Block Generic Address
diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c
index 185547743a..e331abd1ae 100644
--- a/tools/ioemu/vl.c
+++ b/tools/ioemu/vl.c
@@ -6448,7 +6448,6 @@ int main(int argc, char **argv)
     fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
             shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
 
-    /* not yet add for IA64 */
     buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
                                             PROT_READ|PROT_WRITE,
                                             page_array[shared_page_nr - 2]);
@@ -6465,7 +6464,7 @@ int main(int argc, char **argv)
 #elif defined(__ia64__)
   
     if (xc_ia64_get_pfn_list(xc_handle, domid, page_array,
-                             IO_PAGE_START >> PAGE_SHIFT, 1) != 1) {
+                             IO_PAGE_START >> PAGE_SHIFT, 3) != 3) {
         fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
         exit(-1);
     }
@@ -6477,6 +6476,12 @@ int main(int argc, char **argv)
     fprintf(logfile, "shared page at pfn:%lx, mfn: %016lx\n",
             IO_PAGE_START >> PAGE_SHIFT, page_array[0]);
 
+    buffered_io_page =xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE,
+                                       page_array[2]);
+    fprintf(logfile, "Buffered IO page at pfn:%lx, mfn: %016lx\n",
+            BUFFER_IO_PAGE_START >> PAGE_SHIFT, page_array[2]);
+
     if (xc_ia64_get_pfn_list(xc_handle, domid,
                              page_array, 0, nr_pages) != nr_pages) {
         fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
@@ -6496,6 +6501,7 @@ int main(int argc, char **argv)
         fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
         exit(-1);
     }
+    free(page_array);
 #endif
 #else  /* !CONFIG_DM */
 
diff --git a/tools/ioemu/vnc.c b/tools/ioemu/vnc.c
index 9b8bcffa37..631754ca03 100644
--- a/tools/ioemu/vnc.c
+++ b/tools/ioemu/vnc.c
@@ -203,6 +203,8 @@ static void set_bits_in_row(VncState *vs, uint64_t *row,
 	mask = ~(0ULL);
 
     h += y;
+    if (h > vs->ds->height)
+        h = vs->ds->height;
     for (; y < h; y++)
 	row[y] |= mask;
 }
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index b5e61af64d..129b867ff6 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -31,7 +31,7 @@ GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
-CFLAGS   += -Werror
+CFLAGS   += -Werror -Wmissing-prototypes
 CFLAGS   += -fno-strict-aliasing
 CFLAGS   += $(INCLUDES) -I.
 
diff --git a/tools/libxc/ia64/xc_ia64_hvm_build.c b/tools/libxc/ia64/xc_ia64_hvm_build.c
index 2c34b44a1d..0caaf343b3 100644
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c
@@ -551,8 +551,9 @@ setup_guest(int xc_handle, uint32_t dom, unsigned long memsize,
             char *image, unsigned long image_size, uint32_t vcpus,
             unsigned int store_evtchn, unsigned long *store_mfn)
 {
-    unsigned long page_array[2];
+    unsigned long page_array[3];
     shared_iopage_t *sp;
+    void *ioreq_buffer_page;
     unsigned long dom_memsize = (memsize << 20);
     DECLARE_DOMCTL;
 
@@ -587,7 +588,7 @@ setup_guest(int xc_handle, uint32_t dom, unsigned long memsize,
 
     /* Retrieve special pages like io, xenstore, etc. */
     if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
-                             IO_PAGE_START>>PAGE_SHIFT, 2) != 2) {
+                             IO_PAGE_START>>PAGE_SHIFT, 3) != 3) {
         PERROR("Could not get the page frame list");
         goto error_out;
     }
@@ -604,7 +605,10 @@ setup_guest(int xc_handle, uint32_t dom, unsigned long memsize,
 
     memset(sp, 0, PAGE_SIZE);
     munmap(sp, PAGE_SIZE);
-
+    ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom,
+                               PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[2]); 
+    memset(ioreq_buffer_page,0,PAGE_SIZE);
+    munmap(ioreq_buffer_page, PAGE_SIZE);
     return 0;
 
 error_out:
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c
index 822e55601b..e215d7e198 100644
--- a/tools/libxc/xc_linux_build.c
+++ b/tools/libxc/xc_linux_build.c
@@ -128,7 +128,7 @@ static int probeimageformat(const char *image,
     return 0;
 }
 
-int load_initrd(int xc_handle, domid_t dom,
+static int load_initrd(int xc_handle, domid_t dom,
                 struct initrd_info *initrd,
                 unsigned long physbase,
                 xen_pfn_t *phys_to_mach)
diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c
index 6e323340e3..e4bd09ed19 100644
--- a/tools/libxc/xc_linux_restore.c
+++ b/tools/libxc/xc_linux_restore.c
@@ -57,7 +57,7 @@ read_exact(int fd, void *buf, size_t count)
 ** This function inverts that operation, replacing the pfn values with
 ** the (now known) appropriate mfn values.
 */
-int uncanonicalize_pagetable(unsigned long type, void *page)
+static int uncanonicalize_pagetable(unsigned long type, void *page)
 {
     int i, pte_last;
     unsigned long pfn;
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
index d955072726..7a5e4eaad6 100644
--- a/tools/libxc/xc_linux_save.c
+++ b/tools/libxc/xc_linux_save.c
@@ -413,7 +413,7 @@ static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
 ** which entries do not require canonicalization (in particular, those
 ** entries which map the virtual address reserved for the hypervisor).
 */
-int canonicalize_pagetable(unsigned long type, unsigned long pfn,
+static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
                            const void *spage, void *dpage)
 {
 
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index aea9cd78d8..768cf5c5cf 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -6,6 +6,7 @@
 
 #include <inttypes.h>
 #include "xc_private.h"
+#include "xg_private.h"
 
 int lock_pages(void *addr, size_t len)
 {
@@ -35,23 +36,6 @@ int xc_get_pfn_type_batch(int xc_handle,
     return do_domctl(xc_handle, &domctl);
 }
 
-#define GETPFN_ERR (~0U)
-unsigned int get_pfn_type(int xc_handle,
-                          unsigned long mfn,
-                          uint32_t dom)
-{
-    DECLARE_DOMCTL;
-    domctl.cmd = XEN_DOMCTL_getpageframeinfo;
-    domctl.u.getpageframeinfo.gmfn   = mfn;
-    domctl.domain = (domid_t)dom;
-    if ( do_domctl(xc_handle, &domctl) < 0 )
-    {
-        PERROR("Unexpected failure when getting page frame info!");
-        return GETPFN_ERR;
-    }
-    return domctl.u.getpageframeinfo.type;
-}
-
 int xc_mmuext_op(
     int xc_handle,
     struct mmuext_op *op,
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index 88a8a169bc..8a761d89cf 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -471,7 +471,7 @@ class IA64_HVM_ImageHandler(HVMImageHandler):
     def getRequiredAvailableMemory(self, mem_kb):
         page_kb = 16
         # ROM size for guest firmware, ioreq page and xenstore page
-        extra_pages = 1024 + 2
+        extra_pages = 1024 + 3
         return mem_kb + extra_pages * page_kb
 
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
@@ -500,9 +500,12 @@ class X86_HVM_ImageHandler(HVMImageHandler):
         # overhead due to getRequiredInitialReservation.
         maxmem_kb = self.getRequiredInitialReservation(maxmem_kb)
 
-        # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
-        # the minimum that Xen would allocate if no value were given.
-        return max(1024 * self.vm.getVCpuCount() + maxmem_kb / 256,
+        # 256 pages (1MB) per vcpu,
+        # plus 1 page per MiB of RAM for the P2M map,
+        # plus 1 page per MiB of RAM to shadow the resident processes.  
+        # This is higher than the minimum that Xen would allocate if no value 
+        # were given (but the Xen minimum is for safety, not performance).
+        return max(4 * (256 * self.vm.getVCpuCount() + 2 * (maxmem_kb / 1024)),
                    shadow_mem_kb)
 
 
diff --git a/tools/python/xen/xend/server/SrvDaemon.py b/tools/python/xen/xend/server/SrvDaemon.py
index f883e7da85..baba3c437d 100644
--- a/tools/python/xen/xend/server/SrvDaemon.py
+++ b/tools/python/xen/xend/server/SrvDaemon.py
@@ -9,6 +9,7 @@ import os
 import signal
 import sys
 import threading
+import time
 import linecache
 import pwd
 import re
@@ -106,12 +107,14 @@ class Daemon:
         os.close(2)
         if XEND_DEBUG:
             os.open('/dev/null', os.O_RDONLY)
-            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
+            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT|os.O_APPEND)
             os.dup(1)
         else:
             os.open('/dev/null', os.O_RDWR)
             os.dup(0)
-            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
+            os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT|os.O_APPEND)
+        print >>sys.stderr, ("Xend started at %s." %
+                             time.asctime(time.localtime()))
 
         
     def start(self, trace=0):
diff --git a/tools/xenstat/xentop/xentop.1 b/tools/xenstat/xentop/xentop.1
index c7a856bed1..b925a3795f 100644
--- a/tools/xenstat/xentop/xentop.1
+++ b/tools/xenstat/xentop/xentop.1
@@ -47,6 +47,9 @@ seconds between updates (default 3)
 \fB\-n\fR, \fB\-\-networks\fR
 output network information
 .TP
+\fB\-x\fR, \fB\-\-vbds\fR
+output vbd block device data
+.TP
 \fB\-r\fR, \fB\-\-repeat\-header\fR
 repeat table header before each domain
 .TP
diff --git a/tools/xenstat/xentop/xentop.c b/tools/xenstat/xentop/xentop.c
index 7d3ec59d2e..b772f951fb 100644
--- a/tools/xenstat/xentop/xentop.c
+++ b/tools/xenstat/xentop/xentop.c
@@ -204,7 +204,7 @@ static void usage(const char *program)
 	       "-V, --version        output version information and exit\n"
 	       "-d, --delay=SECONDS  seconds between updates (default 3)\n"
 	       "-n, --networks       output vif network data\n"
-	       "-b, --vbds           output vbd block device data\n"
+	       "-x, --vbds           output vbd block device data\n"
 	       "-r, --repeat-header  repeat table header before each domain\n"
 	       "-v, --vcpus          output vcpu data\n"
 	       "-b, --batch	     output in batch mode, no user input accepted\n"
@@ -976,7 +976,7 @@ int main(int argc, char **argv)
 		{ "help",          no_argument,       NULL, 'h' },
 		{ "version",       no_argument,       NULL, 'V' },
 		{ "networks",      no_argument,       NULL, 'n' },
- 		{ "vbds",          no_argument,       NULL, 'x' },
+		{ "vbds",          no_argument,       NULL, 'x' },
 		{ "repeat-header", no_argument,       NULL, 'r' },
 		{ "vcpus",         no_argument,       NULL, 'v' },
 		{ "delay",         required_argument, NULL, 'd' },
@@ -1065,7 +1065,7 @@ int main(int argc, char **argv)
 					break;
 			} while (1);
 	}
-	
+
 	/* Cleanup occurs in cleanup(), so no work to do here. */
 
 	return 0;
diff --git a/tools/xm-test/lib/XmTestLib/arch.py b/tools/xm-test/lib/XmTestLib/arch.py
index d5a1aa55cb..5625a53546 100644
--- a/tools/xm-test/lib/XmTestLib/arch.py
+++ b/tools/xm-test/lib/XmTestLib/arch.py
@@ -124,6 +124,7 @@ _uname_to_arch_map = {
     "i486"  : "x86",
     "i586"  : "x86",
     "i686"  : "x86",
+    "x86_64": "x86_64",
     "ia64"  : "ia64",
     "ppc"   : "powerpc",
     "ppc64" : "powerpc",
@@ -131,7 +132,7 @@ _uname_to_arch_map = {
 
 # Lookup current platform.
 _arch = _uname_to_arch_map.get(os.uname()[4], "Unknown")
-if _arch == "x86" or _arch == "ia64":
+if _arch == "x86" or _arch == "x86_64" or _arch == "ia64":
     minSafeMem = ia_minSafeMem
     getDefaultKernel = ia_getDefaultKernel
     checkBuffer = ia_checkBuffer
diff --git a/unmodified_drivers/linux-2.6/blkfront/Makefile b/unmodified_drivers/linux-2.6/blkfront/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/blkfront/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h
new file mode 100644
index 0000000000..ebde567575
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h
@@ -0,0 +1,14 @@
+#ifndef _PGTABLE_NOPMD_H
+#define _PGTABLE_NOPMD_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+#error "This version of Linux should not need compat pgtable-nopmd.h"
+#endif
+
+#define pud_t             pgd_t
+#define pud_offset(d, va)     d
+#define pud_none(pud)         0
+#define pud_present(pud)      1
+#define PTRS_PER_PUD          1
+
+#endif /* _PGTABLE_NOPMD_H */
diff --git a/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h
new file mode 100644
index 0000000000..8b23299dd0
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h
@@ -0,0 +1,14 @@
+#ifndef _PGTABLE_NOPUD_H
+#define _PGTABLE_NOPUD_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+#error "This version of Linux should not need compat pgtable-nopud.h"
+#endif
+
+#define pud_t             pgd_t
+#define pud_offset(d, va)     d
+#define pud_none(pud)         0
+#define pud_present(pud)      1
+#define PTRS_PER_PUD          1
+
+#endif /* _PGTABLE_NOPUD_H */
diff --git a/unmodified_drivers/linux-2.6/compat-include/linux/io.h b/unmodified_drivers/linux-2.6/compat-include/linux/io.h
new file mode 100644
index 0000000000..10499023a5
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/linux/io.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_IO_H
+#define _LINUX_IO_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+#error "This version of Linux should not need compat linux/io.h"
+#endif
+
+#include <asm/io.h>
+
+#endif
diff --git a/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h b/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h
new file mode 100644
index 0000000000..fcb4a899c7
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/linux/mutex.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2006 Cisco Systems.  All rights reserved.
+ *
+ * This file is released under the GPLv2.
+ */
+
+/* mutex compatibility for pre-2.6.16 kernels */
+
+#ifndef __LINUX_MUTEX_H
+#define __LINUX_MUTEX_H
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+#error "This version of Linux should not need compat mutex.h"
+#endif
+
+#include <linux/version.h>
+#include <asm/semaphore.h>
+
+#define mutex semaphore
+#define DEFINE_MUTEX(foo) DECLARE_MUTEX(foo)
+#define mutex_init(foo) init_MUTEX(foo)
+#define mutex_lock(foo) down(foo)
+#define mutex_lock_interruptible(foo) down_interruptible(foo)
+/* this function follows the spin_trylock() convention, so        *
+ * it is negated to the down_trylock() return values! Be careful  */
+#define mutex_trylock(foo) !down_trylock(foo)
+#define mutex_unlock(foo) up(foo)
+
+#endif /* __LINUX_MUTEX_H */
diff --git a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
new file mode 100644
index 0000000000..4978c63610
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
@@ -0,0 +1,52 @@
+#ifndef COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
+#define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
+
+#include <linux/version.h>
+
+#include <linux/spinlock.h>
+
+#if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
+#define __always_inline inline
+#endif
+
+#if defined(__LINUX_SPINLOCK_H) && !defined(DEFINE_SPINLOCK)
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+#endif
+
+#if defined(_LINUX_INIT_H) && !defined(__init)
+#define __init
+#endif
+
+#if defined(__LINUX_CACHE_H) && !defined(__read_mostly)
+#define __read_mostly
+#endif
+
+#if defined(_LINUX_SKBUFF_H) && !defined(NET_IP_ALIGN)
+#define NET_IP_ALIGN 0
+#endif
+
+#if defined(_LINUX_FS_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+#define nonseekable_open(inode, filp) /* Nothing to do */
+#endif
+
+#if defined(_LINUX_MM_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
+unsigned long vmalloc_to_pfn(void *addr);
+#endif
+
+#if defined(__LINUX_COMPLETION_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout);
+#endif
+
+#if defined(_LINUX_SCHED_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+signed long schedule_timeout_interruptible(signed long timeout);
+#endif
+
+#if defined(_LINUX_SLAB_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+void *kzalloc(size_t size, int flags);
+#endif
+
+#if defined(_LINUX_BLKDEV_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+#define end_that_request_last(req, uptodate) end_that_request_last(req)
+#endif
+
+#endif
diff --git a/unmodified_drivers/linux-2.6/netfront/Makefile b/unmodified_drivers/linux-2.6/netfront/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/netfront/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/unmodified_drivers/linux-2.6/overrides.mk b/unmodified_drivers/linux-2.6/overrides.mk
index 74ef12c4c9..53a96d87a4 100644
--- a/unmodified_drivers/linux-2.6/overrides.mk
+++ b/unmodified_drivers/linux-2.6/overrides.mk
@@ -9,4 +9,4 @@ EXTRA_CFLAGS += -DCONFIG_XEN_SHADOW_MODE -DCONFIG_XEN_SHADOW_TRANSLATE
 EXTRA_CFLAGS += -DCONFIG_XEN_BLKDEV_GRANT -DXEN_EVTCHN_MASK_OPS
 EXTRA_CFLAGS += -DCONFIG_XEN_NETDEV_GRANT_RX -DCONFIG_XEN_NETDEV_GRANT_TX
 EXTRA_CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030202
-EXTRA_CFLAGS += -I$(M)/include
+EXTRA_CFLAGS += -I$(M)/include -I$(M)/compat-include -DHAVE_XEN_PLATFORM_COMPAT_H
diff --git a/unmodified_drivers/linux-2.6/platform-pci/Kbuild b/unmodified_drivers/linux-2.6/platform-pci/Kbuild
index dda3d0e7cf..a44e50e94c 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/Kbuild
+++ b/unmodified_drivers/linux-2.6/platform-pci/Kbuild
@@ -4,7 +4,7 @@ obj-m := xen-platform-pci.o
 
 EXTRA_CFLAGS += -I$(M)/platform-pci
 
-xen-platform-pci-objs := evtchn.o platform-pci.o gnttab.o xen_support.o features.o
+xen-platform-pci-objs := evtchn.o platform-pci.o gnttab.o xen_support.o features.o platform-compat.o
 
 # Can we do better ?
 ifeq ($(ARCH),ia64)
diff --git a/unmodified_drivers/linux-2.6/platform-pci/Makefile b/unmodified_drivers/linux-2.6/platform-pci/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/platform-pci/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
index a38c50c1c4..4bd9592754 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
@@ -36,6 +36,10 @@
 #include <xen/features.h>
 #include "platform-pci.h"
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 void *shared_info_area;
 
 #define MAX_EVTCHN 256
diff --git a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c
new file mode 100644
index 0000000000..f3cef11620
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c
@@ -0,0 +1,116 @@
+#include <linux/config.h>
+#include <linux/version.h>
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <xen/platform-compat.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)
+static int system_state = 1;
+EXPORT_SYMBOL(system_state);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
+size_t strcspn(const char *s, const char *reject)
+{
+        const char *p;
+        const char *r;
+        size_t count = 0;
+
+        for (p = s; *p != '\0'; ++p) {
+                for (r = reject; *r != '\0'; ++r) {
+                        if (*p == *r)
+                                return count;
+                }
+                ++count;
+        }
+
+        return count;
+}
+EXPORT_SYMBOL(strcspn);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
+/*
+ * Map a vmalloc()-space virtual address to the physical page frame number.
+ */
+unsigned long vmalloc_to_pfn(void * vmalloc_addr)
+{
+        return page_to_pfn(vmalloc_to_page(vmalloc_addr));
+}
+EXPORT_SYMBOL(vmalloc_to_pfn);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout)
+{
+        might_sleep();
+
+        spin_lock_irq(&x->wait.lock);
+        if (!x->done) {
+                DECLARE_WAITQUEUE(wait, current);
+
+                wait.flags |= WQ_FLAG_EXCLUSIVE;
+                __add_wait_queue_tail(&x->wait, &wait);
+                do {
+                        __set_current_state(TASK_UNINTERRUPTIBLE);
+                        spin_unlock_irq(&x->wait.lock);
+                        timeout = schedule_timeout(timeout);
+                        spin_lock_irq(&x->wait.lock);
+                        if (!timeout) {
+                                __remove_wait_queue(&x->wait, &wait);
+                                goto out;
+                        }
+                } while (!x->done);
+                __remove_wait_queue(&x->wait, &wait);
+        }
+        x->done--;
+out:
+        spin_unlock_irq(&x->wait.lock);
+        return timeout;
+}
+EXPORT_SYMBOL(wait_for_completion_timeout);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12)
+/*
+    fake do_exit using complete_and_exit
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)
+asmlinkage NORET_TYPE void do_exit(long code)
+#else
+fastcall NORET_TYPE void do_exit(long code)
+#endif
+{
+    complete_and_exit(NULL, code);
+}
+EXPORT_SYMBOL_GPL(do_exit);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+signed long schedule_timeout_interruptible(signed long timeout)
+{
+	__set_current_state(TASK_INTERRUPTIBLE);
+	return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_interruptible);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+/**
+ * kzalloc - allocate memory. The memory is set to zero.
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ */
+void *kzalloc(size_t size, int flags)
+{
+	void *ret = kmalloc(size, flags);
+	if (ret)
+		memset(ret, 0, size);
+	return ret;
+}
+EXPORT_SYMBOL(kzalloc);
+#endif
diff --git a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
index cb9e8dd7e5..5ff6ba83f7 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -33,6 +33,7 @@
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/hypervisor.h>
+#include <asm/pgtable.h>
 #include <xen/interface/memory.h>
 #include <xen/features.h>
 #ifdef __ia64__
@@ -41,6 +42,10 @@
 
 #include "platform-pci.h"
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 #define DRV_NAME    "xen-platform-pci"
 #define DRV_VERSION "0.10"
 #define DRV_RELDATE "03/03/2005"
diff --git a/unmodified_drivers/linux-2.6/platform-pci/xen_support.c b/unmodified_drivers/linux-2.6/platform-pci/xen_support.c
index b1a903b1c7..423d2f2e24 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/xen_support.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/xen_support.c
@@ -26,6 +26,10 @@
 #include <asm/hypervisor.h>
 #include "platform-pci.h"
 
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
 {
 	BUG();
diff --git a/unmodified_drivers/linux-2.6/xenbus/Makefile b/unmodified_drivers/linux-2.6/xenbus/Makefile
new file mode 100644
index 0000000000..64e7acd194
--- /dev/null
+++ b/unmodified_drivers/linux-2.6/xenbus/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff --git a/xen/arch/ia64/vmx/mmio.c b/xen/arch/ia64/vmx/mmio.c
index 579785b563..d605e828f0 100644
--- a/xen/arch/ia64/vmx/mmio.c
+++ b/xen/arch/ia64/vmx/mmio.c
@@ -52,6 +52,70 @@ struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
 #define PIB_OFST_INTA           0x1E0000
 #define PIB_OFST_XTP            0x1E0008
 
+#define HVM_BUFFERED_IO_RANGE_NR 1
+
+struct hvm_buffered_io_range {
+    unsigned long start_addr;
+    unsigned long length;
+};
+
+static struct hvm_buffered_io_range buffered_stdvga_range = {0xA0000, 0x20000};
+static struct hvm_buffered_io_range
+*hvm_buffered_io_ranges[HVM_BUFFERED_IO_RANGE_NR] =
+{
+    &buffered_stdvga_range
+};
+
+int hvm_buffered_io_intercept(ioreq_t *p)
+{
+    struct vcpu *v = current;
+    spinlock_t  *buffered_io_lock;
+    buffered_iopage_t *buffered_iopage =
+        (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
+    unsigned long tmp_write_pointer = 0;
+    int i;
+
+    /* ignore READ ioreq_t! */
+    if ( p->dir == IOREQ_READ )
+        return 0;
+
+    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
+        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
+             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
+                                     hvm_buffered_io_ranges[i]->length )
+            break;
+    }
+
+    if ( i == HVM_BUFFERED_IO_RANGE_NR )
+        return 0;
+
+    buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
+    spin_lock(buffered_io_lock);
+
+    if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer ==
+         (unsigned long)IOREQ_BUFFER_SLOT_NUM ) {
+        /* the queue is full.
+         * send the iopacket through the normal path.
+         * NOTE: The arithimetic operation could handle the situation for
+         * write_pointer overflow.
+         */
+        spin_unlock(buffered_io_lock);
+        return 0;
+    }
+
+    tmp_write_pointer = buffered_iopage->write_pointer % IOREQ_BUFFER_SLOT_NUM;
+
+    memcpy(&buffered_iopage->ioreq[tmp_write_pointer], p, sizeof(ioreq_t));
+
+    /*make the ioreq_t visible before write_pointer*/
+    wmb();
+    buffered_iopage->write_pointer++;
+
+    spin_unlock(buffered_io_lock);
+
+    return 1;
+}
+
 static void write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value);
 
 static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma)
@@ -156,7 +220,11 @@ static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
     p->df = 0;
 
     p->io_count++;
-
+    if(hvm_buffered_io_intercept(p)){
+        p->state = STATE_IORESP_READY;
+        vmx_io_assist(v);
+        return ;
+    }else 
     vmx_send_assist_req(v);
     if(dir==IOREQ_READ){ //read
         *val=p->u.data;
diff --git a/xen/arch/ia64/vmx/vmx_init.c b/xen/arch/ia64/vmx/vmx_init.c
index 2694149d5b..9d8fbe8ec8 100644
--- a/xen/arch/ia64/vmx/vmx_init.c
+++ b/xen/arch/ia64/vmx/vmx_init.c
@@ -362,8 +362,8 @@ static const io_range_t io_ranges[] = {
 	{PIB_START, PIB_SIZE, GPFN_PIB},
 };
 
-/* Reseve 1 page for shared I/O and 1 page for xenstore.  */
-#define VMX_SYS_PAGES	(2 + (GFW_SIZE >> PAGE_SHIFT))
+/* Reseve 1 page for shared I/O ,1 page for xenstore and 1 page for buffer I/O.  */
+#define VMX_SYS_PAGES	(3 + (GFW_SIZE >> PAGE_SHIFT))
 #define VMX_CONFIG_PAGES(d) ((d)->max_pages - VMX_SYS_PAGES)
 
 static void vmx_build_physmap_table(struct domain *d)
@@ -425,8 +425,12 @@ static void vmx_build_physmap_table(struct domain *d)
 	mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
 	assign_domain_page(d, STORE_PAGE_START, mfn << PAGE_SHIFT);
 	list_ent = mfn_to_page(mfn)->list.next;
+	ASSERT(list_ent != &d->page_list);
+    
+    mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
+    assign_domain_page(d, BUFFER_IO_PAGE_START, mfn << PAGE_SHIFT);
+    list_ent = mfn_to_page(mfn)->list.next;
 	ASSERT(list_ent == &d->page_list);
-
 }
 
 void vmx_setup_platform(struct domain *d)
@@ -437,6 +441,10 @@ void vmx_setup_platform(struct domain *d)
 
 	d->arch.vmx_platform.shared_page_va =
 		(unsigned long)__va(__gpa_to_mpa(d, IO_PAGE_START));
+    //For buffered IO requests.
+    spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
+    d->arch.hvm_domain.buffered_io_va =
+        (unsigned long)__va(__gpa_to_mpa(d, BUFFER_IO_PAGE_START));
 	/* TEMP */
 	d->arch.vmx_platform.pib_base = 0xfee00000UL;
 
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 31f2793fb9..89cc508d02 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -28,12 +28,14 @@ obj-y += microcode.o
 obj-y += mm.o
 obj-y += mpparse.o
 obj-y += nmi.o
+obj-y += numa.o
 obj-y += physdev.o
 obj-y += rwlock.o
 obj-y += setup.o
 obj-y += shutdown.o
 obj-y += smp.o
 obj-y += smpboot.o
+obj-y += srat.o
 obj-y += string.o
 obj-y += sysctl.o
 obj-y += time.o
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index 37623ff5eb..843a9232bf 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -5,6 +5,7 @@ obj-y += hvm.o
 obj-y += i8254.o
 obj-y += i8259.o
 obj-y += rtc.o
+obj-y += pmtimer.o
 obj-y += instrlen.o
 obj-y += intercept.o
 obj-y += io.o
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 47d7ca46c4..f950d05295 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -43,7 +43,7 @@
 #include <asm/mc146818rtc.h>
 #include <asm/spinlock.h>
 #include <asm/hvm/hvm.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
 #include <asm/hvm/support.h>
 #include <public/sched.h>
 #include <public/hvm/ioreq.h>
@@ -285,6 +285,7 @@ void hvm_setup_platform(struct domain* d)
                pt_timer_fn, v, v->processor);
     pit_init(v, cpu_khz);
     rtc_init(v, RTC_PORT(0), RTC_IRQ);
+    pmtimer_init(v, ACPI_PM_TMR_BLK_ADDRESS); 
 }
 
 void pic_irq_request(void *data, int level)
diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c
index 5f27ee25b2..464ddee8f9 100644
--- a/xen/arch/x86/hvm/i8254.c
+++ b/xen/arch/x86/hvm/i8254.c
@@ -38,7 +38,7 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
 #include <asm/current.h>
 
 /* Enable DEBUG_PIT may cause guest calibration inaccuracy */
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 8993572e10..a1ce8ddf33 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -35,7 +35,7 @@
 #include <asm/shadow.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c
new file mode 100644
index 0000000000..e0c93536ea
--- /dev/null
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -0,0 +1,63 @@
+#include <asm/hvm/vpt.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+
+#define TMR_STS (1 << 0)
+static void pmt_update_status(void *opaque)
+{
+   PMTState *s = opaque;
+   s->pm1_status |= TMR_STS;
+
+   /* TODO: When TMR_EN == 1, generate a SCI event */
+
+   set_timer(&s->timer, NOW() + (1000000000ULL << 31) / FREQUENCE_PMTIMER);
+}
+
+static int handle_pmt_io(ioreq_t *p)
+{
+    struct vcpu *v = current;
+    PMTState *s = &v->domain->arch.hvm_domain.pl_time.vpmt;
+    uint64_t curr_gtime;
+
+    if (p->size != 4 ||
+        p->pdata_valid ||
+        p->type != IOREQ_TYPE_PIO){
+        printk("HVM_PMT: wrong PM timer IO\n");
+        return 1;
+    }
+    
+    if (p->dir == 0) { /* write */
+        /* PM_TMR_BLK is read-only */
+        return 1;
+    } else if (p->dir == 1) { /* read */
+        curr_gtime = hvm_get_guest_time(s->vcpu);
+        s->pm1_timer += ((curr_gtime - s->last_gtime) * s->scale) >> 32;
+        p->u.data = s->pm1_timer;
+        s->last_gtime = curr_gtime;
+        return 1;
+    }
+    return 0;
+}
+
+void pmtimer_init(struct vcpu *v, int base)
+{
+    PMTState *s = &v->domain->arch.hvm_domain.pl_time.vpmt;
+
+    s->pm1_timer = 0;
+    s->pm1_status = 0;
+    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v);
+    s->vcpu = v;
+
+    init_timer(&s->timer, pmt_update_status, s, v->processor);
+    /* ACPI supports a 32-bit power management timer */
+    set_timer(&s->timer, NOW() + (1000000000ULL << 31) / FREQUENCE_PMTIMER);
+    
+    register_portio_handler(base, 4, handle_pmt_io);
+}
+
+void pmtimer_deinit(struct domain *d)
+{
+    PMTState *s = &d->arch.hvm_domain.pl_time.vpmt;
+
+    kill_timer(&s->timer);
+}
diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
index 210168c26b..0f5e11986e 100644
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -23,7 +23,7 @@
  */
 
 #include <asm/mc146818rtc.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
 #include <asm/current.h>
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 92167d74fb..88c0802425 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -922,6 +922,7 @@ static void svm_relinquish_guest_resources(struct domain *d)
 
     kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
     rtc_deinit(d);
+    pmtimer_deinit(d);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -937,6 +938,7 @@ static void svm_migrate_timers(struct vcpu *v)
     struct periodic_time *pt = 
         &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
     struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
+    struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
 
     if ( pt->enabled )
     {
@@ -947,6 +949,7 @@ static void svm_migrate_timers(struct vcpu *v)
         migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
     migrate_timer(&vrtc->second_timer, v->processor);
     migrate_timer(&vrtc->second_timer2, v->processor);
+    migrate_timer(&vpmt->timer, v->processor);
 }
 
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 476f8beae9..ac1be73556 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -147,6 +147,7 @@ static void vmx_relinquish_guest_resources(struct domain *d)
 
     kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
     rtc_deinit(d);
+    pmtimer_deinit(d);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -489,6 +490,7 @@ void vmx_migrate_timers(struct vcpu *v)
 {
     struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm);
     struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc;
+    struct PMTState *vpmt = &v->domain->arch.hvm_domain.pl_time.vpmt;
 
     if ( pt->enabled )
     {
@@ -499,6 +501,7 @@ void vmx_migrate_timers(struct vcpu *v)
         migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor);
     migrate_timer(&vrtc->second_timer, v->processor);
     migrate_timer(&vrtc->second_timer2, v->processor);
+    migrate_timer(&vpmt->timer, v->processor);
 }
 
 static void vmx_store_cpu_guest_regs(
diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
new file mode 100644
index 0000000000..d332320af6
--- /dev/null
+++ b/xen/arch/x86/numa.c
@@ -0,0 +1,308 @@
+/* 
+ * Generic VM initialization for x86-64 NUMA setups.
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
+ */ 
+
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/ctype.h>
+#include <xen/nodemask.h>
+#include <xen/numa.h>
+#include <xen/keyhandler.h>
+#include <xen/time.h>
+#include <xen/smp.h>
+#include <asm/acpi.h>
+
+static int numa_setup(char *s);
+custom_param("numa", numa_setup);
+
+#ifndef Dprintk
+#define Dprintk(x...)
+#endif
+
+/* from proto.h */
+#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
+
+struct node_data node_data[MAX_NUMNODES];
+
+int memnode_shift;
+u8  memnodemap[NODEMAPSIZE];
+
+unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+
+nodemask_t node_online_map = { { [0] = 1UL } };
+
+/* Default NUMA to off for now. acpi=on required to enable it. */
+int numa_off __initdata = 1;
+
+int acpi_numa __initdata;
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 1 if OK
+ * 0 if memnodmap[] too small (of shift too small)
+ * -1 if node overlap or lost ram (shift too big)
+ */
+static int __init
+populate_memnodemap(const struct node *nodes, int numnodes, int shift)
+{
+	int i; 
+	int res = -1;
+	unsigned long addr, end;
+
+	if (shift >= 64)
+		return -1;
+	memset(memnodemap, 0xff, sizeof(memnodemap));
+	for (i = 0; i < numnodes; i++) {
+		addr = nodes[i].start;
+		end = nodes[i].end;
+		if (addr >= end)
+			continue;
+		if ((end >> shift) >= NODEMAPSIZE)
+			return 0;
+		do {
+			if (memnodemap[addr >> shift] != 0xff)
+				return -1;
+			memnodemap[addr >> shift] = i;
+			addr += (1UL << shift);
+		} while (addr < end);
+		res = 1;
+	} 
+	return res;
+}
+
+int __init compute_hash_shift(struct node *nodes, int numnodes)
+{
+	int shift = 20;
+
+	while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
+		shift++;
+
+	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
+		shift);
+
+	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
+		printk(KERN_INFO
+	"Your memory is not aligned you need to rebuild your kernel "
+	"with a bigger NODEMAPSIZE shift=%d\n",
+			shift);
+		return -1;
+	}
+	return shift;
+}
+
+/* initialize NODE_DATA given nodeid and start/end */
+void __init setup_node_bootmem(int nodeid, u64 start, u64 end)
+{ 
+	unsigned long start_pfn, end_pfn;
+
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = end >> PAGE_SHIFT;
+
+	NODE_DATA(nodeid)->node_id = nodeid;
+	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
+	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+
+	node_set_online(nodeid);
+} 
+
+void __init numa_init_array(void)
+{
+	int rr, i;
+	/* There are unfortunately some poorly designed mainboards around
+	   that only connect memory to a single CPU. This breaks the 1:1 cpu->node
+	   mapping. To avoid this fill in the mapping for all possible
+	   CPUs, as the number of CPUs is not known yet. 
+	   We round robin the existing nodes. */
+	rr = first_node(node_online_map);
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_to_node[i] != NUMA_NO_NODE)
+			continue;
+ 		numa_set_node(i, rr);
+		rr = next_node(rr, node_online_map);
+		if (rr == MAX_NUMNODES)
+			rr = first_node(node_online_map);
+	}
+
+}
+
+#ifdef CONFIG_NUMA_EMU
+static int numa_fake __initdata = 0;
+
+/* Numa emulation */
+static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+{
+ 	int i;
+ 	struct node nodes[MAX_NUMNODES];
+ 	unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
+
+ 	/* Kludge needed for the hash function */
+ 	if (hweight64(sz) > 1) {
+ 		unsigned long x = 1;
+ 		while ((x << 1) < sz)
+ 			x <<= 1;
+ 		if (x < sz/2)
+ 			printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
+ 		sz = x;
+ 	}
+
+ 	memset(&nodes,0,sizeof(nodes));
+ 	for (i = 0; i < numa_fake; i++) {
+ 		nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
+ 		if (i == numa_fake-1)
+ 			sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
+ 		nodes[i].end = nodes[i].start + sz;
+ 		printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
+		       i,
+		       nodes[i].start, nodes[i].end,
+		       (nodes[i].end - nodes[i].start) >> 20);
+		node_set_online(i);
+ 	}
+ 	memnode_shift = compute_hash_shift(nodes, numa_fake);
+ 	if (memnode_shift < 0) {
+ 		memnode_shift = 0;
+ 		printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
+ 		return -1;
+ 	}
+ 	for_each_online_node(i)
+ 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ 	numa_init_array();
+ 	return 0;
+}
+#endif
+
+void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{ 
+	int i;
+
+#ifdef CONFIG_NUMA_EMU
+	if (numa_fake && !numa_emulation(start_pfn, end_pfn))
+		return;
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+	if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
+					  end_pfn << PAGE_SHIFT))
+		return;
+#endif
+
+	printk(KERN_INFO "%s\n",
+	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
+
+	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
+	       start_pfn << PAGE_SHIFT,
+	       end_pfn << PAGE_SHIFT); 
+	/* setup dummy node covering all memory */ 
+	memnode_shift = 63; 
+	memnodemap[0] = 0;
+	nodes_clear(node_online_map);
+	node_set_online(0);
+	for (i = 0; i < NR_CPUS; i++)
+		numa_set_node(i, 0);
+	node_to_cpumask[0] = cpumask_of_cpu(0);
+	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
+}
+
+__cpuinit void numa_add_cpu(int cpu)
+{
+	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+} 
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	cpu_to_node[cpu] = node;
+}
+
+/* [numa=off] */
+static __init int numa_setup(char *opt) 
+{ 
+	if (!strncmp(opt,"off",3))
+		numa_off = 1;
+	if (!strncmp(opt,"on",2))
+		numa_off = 0;
+#ifdef CONFIG_NUMA_EMU
+	if(!strncmp(opt, "fake=", 5)) {
+		numa_off = 0;
+		numa_fake = simple_strtoul(opt+5,NULL,0); ;
+		if (numa_fake >= MAX_NUMNODES)
+			numa_fake = MAX_NUMNODES;
+	}
+#endif
+#ifdef CONFIG_ACPI_NUMA
+	if (!strncmp(opt,"noacpi",6)) {
+		numa_off = 0;
+		acpi_numa = -1;
+	}
+#endif
+	return 1;
+} 
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ */
+void __init init_cpu_to_node(void)
+{
+	int i;
+ 	for (i = 0; i < NR_CPUS; i++) {
+		u8 apicid = x86_cpu_to_apicid[i];
+		if (apicid == BAD_APICID)
+			continue;
+		if (apicid_to_node[apicid] == NUMA_NO_NODE)
+			continue;
+		numa_set_node(i,apicid_to_node[apicid]);
+	}
+}
+
+EXPORT_SYMBOL(cpu_to_node);
+EXPORT_SYMBOL(node_to_cpumask);
+EXPORT_SYMBOL(memnode_shift);
+EXPORT_SYMBOL(memnodemap);
+EXPORT_SYMBOL(node_data);
+
+static void dump_numa(unsigned char key)
+{
+	s_time_t now = NOW();
+	int i;
+
+	printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
+		  (u32)(now>>32), (u32)now);
+
+	for_each_online_node(i) {
+		unsigned long pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT;
+		printk("idx%d -> NODE%d start->%lu size->%lu\n",
+			  i, NODE_DATA(i)->node_id,
+			  NODE_DATA(i)->node_start_pfn,
+			  NODE_DATA(i)->node_spanned_pages);
+		/* sanity check phys_to_nid() */
+		printk("phys_to_nid(%lx) -> %d should be %d\n", pa, phys_to_nid(pa),
+			  NODE_DATA(i)->node_id);
+	}
+	for_each_online_cpu(i)
+		printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]);
+}
+
+static __init int register_numa_trigger(void)
+{
+	register_keyhandler('u', dump_numa, "dump numa info");
+	return 0;
+}
+__initcall(register_numa_trigger);
+
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 2c8b638944..15c42b133c 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -16,6 +16,7 @@
 #include <xen/percpu.h>
 #include <xen/hypercall.h>
 #include <xen/keyhandler.h>
+#include <xen/numa.h>
 #include <public/version.h>
 #include <asm/bitops.h>
 #include <asm/smp.h>
@@ -29,6 +30,7 @@
 
 extern void dmi_scan_machine(void);
 extern void generic_apic_probe(void);
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 
 /*
  * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
@@ -257,6 +259,20 @@ static void __init init_idle_domain(void)
     setup_idle_pagetable();
 }
 
+static void srat_detect_node(int cpu)
+{
+    unsigned node;
+    u8 apicid = x86_cpu_to_apicid[cpu];
+
+    node = apicid_to_node[apicid];
+    if ( node == NUMA_NO_NODE )
+        node = 0;
+    numa_set_node(cpu, node);
+
+    if ( acpi_numa > 0 )
+        printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
+}
+
 void __init __start_xen(multiboot_info_t *mbi)
 {
     char __cmdline[] = "", *cmdline = __cmdline;
@@ -485,6 +501,12 @@ void __init __start_xen(multiboot_info_t *mbi)
 
     init_frametable();
 
+    acpi_boot_table_init();
+
+    acpi_numa_init();
+
+    numa_initmem_init(0, max_page);
+
     end_boot_allocator();
 
     /* Initialise the Xen heap, skipping RAM holes. */
@@ -536,9 +558,10 @@ void __init __start_xen(multiboot_info_t *mbi)
 
     generic_apic_probe();
 
-    acpi_boot_table_init();
     acpi_boot_init();
 
+    init_cpu_to_node();
+
     if ( smp_found_config )
         get_smp_config();
 
@@ -589,6 +612,11 @@ void __init __start_xen(multiboot_info_t *mbi)
             break;
         if ( !cpu_online(i) )
             __cpu_up(i);
+
+        /* Set up cpu_to_node[]. */
+        srat_detect_node(i);
+        /* Set up node_to_cpumask based on cpu_to_node[]. */
+        numa_add_cpu(i);        
     }
 
     printk("Brought up %ld CPUs\n", (long)num_online_cpus());
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index eb2d21111c..f7d8712563 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -43,6 +43,7 @@
 #include <xen/delay.h>
 #include <xen/softirq.h>
 #include <xen/serial.h>
+#include <xen/numa.h>
 #include <asm/current.h>
 #include <asm/mc146818rtc.h>
 #include <asm/desc.h>
@@ -628,7 +629,7 @@ u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICI
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
-	int apicid = logical_smp_processor_id();
+	int apicid = hard_smp_processor_id();
 
 	cpu_2_logical_apicid[cpu] = apicid;
 	map_cpu_to_node(cpu, apicid_to_node(apicid));
diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
new file mode 100644
index 0000000000..ea462e222b
--- /dev/null
+++ b/xen/arch/x86/srat.c
@@ -0,0 +1,315 @@
+/*
+ * ACPI 3.0 based NUMA setup
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ * 
+ * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com>
+ */
+
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/inttypes.h>
+#include <xen/nodemask.h>
+#include <xen/acpi.h>
+#include <xen/numa.h>
+#include <asm/page.h>
+
+static struct acpi_table_slit *acpi_slit;
+
+static nodemask_t nodes_parsed __initdata;
+static nodemask_t nodes_found __initdata;
+static struct node nodes[MAX_NUMNODES] __initdata;
+static u8 pxm2node[256] = { [0 ... 255] = 0xff };
+
+/* Too small nodes confuse the VM badly. Usually they result
+   from BIOS bugs. */
+#define NODE_MIN_SIZE (4*1024*1024)
+
+static int node_to_pxm(int n);
+
+int pxm_to_node(int pxm)
+{
+	if ((unsigned)pxm >= 256)
+		return -1;
+	/* Extend 0xff to (int)-1 */
+	return (signed char)pxm2node[pxm];
+}
+
+static __init int setup_node(int pxm)
+{
+	unsigned node = pxm2node[pxm];
+	if (node == 0xff) {
+		if (nodes_weight(nodes_found) >= MAX_NUMNODES)
+			return -1;
+		node = first_unset_node(nodes_found); 
+		node_set(node, nodes_found);
+		pxm2node[pxm] = node;
+	}
+	return pxm2node[pxm];
+}
+
+static __init int conflicting_nodes(u64 start, u64 end)
+{
+	int i;
+	for_each_node_mask(i, nodes_parsed) {
+		struct node *nd = &nodes[i];
+		if (nd->start == nd->end)
+			continue;
+		if (nd->end > start && nd->start < end)
+			return i;
+		if (nd->end == end && nd->start == start)
+			return i;
+	}
+	return -1;
+}
+
+static __init void cutoff_node(int i, u64 start, u64 end)
+{
+	struct node *nd = &nodes[i];
+	if (nd->start < start) {
+		nd->start = start;
+		if (nd->end < nd->start)
+			nd->start = nd->end;
+	}
+	if (nd->end > end) {
+		nd->end = end;
+		if (nd->start > nd->end)
+			nd->start = nd->end;
+	}
+}
+
+static __init void bad_srat(void)
+{
+	int i;
+	printk(KERN_ERR "SRAT: SRAT not used.\n");
+	acpi_numa = -1;
+	for (i = 0; i < MAX_LOCAL_APIC; i++)
+		apicid_to_node[i] = NUMA_NO_NODE;
+}
+
+static __init inline int srat_disabled(void)
+{
+	return numa_off || acpi_numa < 0;
+}
+
+/*
+ * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
+ * up the NUMA heuristics which wants the local node to have a smaller
+ * distance than the others.
+ * Do some quick checks here and only use the SLIT if it passes.
+ */
+static __init int slit_valid(struct acpi_table_slit *slit)
+{
+	int i, j;
+	int d = slit->localities;
+	for (i = 0; i < d; i++) {
+		for (j = 0; j < d; j++)  {
+			u8 val = slit->entry[d*i + j];
+			if (i == j) {
+				if (val != 10)
+					return 0;
+			} else if (val <= 10)
+				return 0;
+		}
+	}
+	return 1;
+}
+
+/* Callback for SLIT parsing */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+	if (!slit_valid(slit)) {
+		printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
+		return;
+	}
+	acpi_slit = slit;
+}
+
+/* Callback for Proximity Domain -> LAPIC mapping */
+void __init
+acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
+{
+	int pxm, node;
+	if (srat_disabled())
+		return;
+	if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) {		bad_srat();
+		return;
+	}
+	if (pa->flags.enabled == 0)
+		return;
+	pxm = pa->proximity_domain;
+	node = setup_node(pxm);
+	if (node < 0) {
+		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
+		bad_srat();
+		return;
+	}
+	apicid_to_node[pa->apic_id] = node;
+	acpi_numa = 1;
+	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+	       pxm, pa->apic_id, node);
+}
+
+/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
+void __init
+acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
+{
+	struct node *nd;
+	u64 start, end;
+	int node, pxm;
+	int i;
+
+	if (srat_disabled())
+		return;
+	if (ma->header.length != sizeof(struct acpi_table_memory_affinity)) {
+		bad_srat();
+		return;
+	}
+	if (ma->flags.enabled == 0)
+		return;
+	start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
+	end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
+	pxm = ma->proximity_domain;
+	node = setup_node(pxm);
+	if (node < 0) {
+		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
+		bad_srat();
+		return;
+	}
+	/* It is fine to add this area to the nodes data it will be used later*/
+	if (ma->flags.hot_pluggable == 1)
+		printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
+				start, end);
+	i = conflicting_nodes(start, end);
+	if (i == node) {
+		printk(KERN_WARNING
+		"SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%"
+		PRIx64"-%"PRIx64")\n", pxm, start, end, nodes[i].start, nodes[i].end);
+	} else if (i >= 0) {
+		printk(KERN_ERR
+		       "SRAT: PXM %d (%"PRIx64"-%"PRIx64") overlaps with PXM %d (%"
+		       PRIx64"-%"PRIx64")\n", pxm, start, end, node_to_pxm(i),
+			   nodes[i].start, nodes[i].end);
+		bad_srat();
+		return;
+	}
+	nd = &nodes[node];
+	if (!node_test_and_set(node, nodes_parsed)) {
+		nd->start = start;
+		nd->end = end;
+	} else {
+		if (start < nd->start)
+			nd->start = start;
+		if (nd->end < end)
+			nd->end = end;
+	}
+	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
+	       nd->start, nd->end);
+}
+
+/* Sanity check to catch more bad SRATs (they are amazingly common).
+   Make sure the PXMs cover all memory. */
+static int nodes_cover_memory(void)
+{
+	int i;
+	u64 pxmram, e820ram;
+
+	pxmram = 0;
+	for_each_node_mask(i, nodes_parsed) {
+		u64 s = nodes[i].start >> PAGE_SHIFT;
+		u64 e = nodes[i].end >> PAGE_SHIFT;
+		pxmram += e - s;
+	}
+
+	e820ram = max_page;
+	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
+	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
+		printk(KERN_ERR "SRAT: PXMs only cover %"PRIu64"MB of your %"
+			PRIu64"MB e820 RAM. Not used.\n",
+			(pxmram << PAGE_SHIFT) >> 20,
+			(e820ram << PAGE_SHIFT) >> 20);
+		return 0;
+	}
+	return 1;
+}
+
+static void unparse_node(int node)
+{
+	int i;
+	node_clear(node, nodes_parsed);
+	for (i = 0; i < MAX_LOCAL_APIC; i++) {
+		if (apicid_to_node[i] == node)
+			apicid_to_node[i] = NUMA_NO_NODE;
+	}
+}
+
+void __init acpi_numa_arch_fixup(void) {}
+
+/* Use the information discovered above to actually set up the nodes. */
+int __init acpi_scan_nodes(u64 start, u64 end)
+{
+	int i;
+
+	/* First clean up the node list */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		cutoff_node(i, start, end);
+		if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
+			unparse_node(i);
+	}
+
+	if (acpi_numa <= 0)
+		return -1;
+
+	if (!nodes_cover_memory()) {
+		bad_srat();
+		return -1;
+	}
+
+	memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
+	if (memnode_shift < 0) {
+		printk(KERN_ERR
+		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
+		bad_srat();
+		return -1;
+	}
+
+	/* Finally register nodes */
+	for_each_node_mask(i, nodes_parsed)
+		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+	for (i = 0; i < NR_CPUS; i++) { 
+		if (cpu_to_node[i] == NUMA_NO_NODE)
+			continue;
+		if (!node_isset(cpu_to_node[i], nodes_parsed))
+			numa_set_node(i, NUMA_NO_NODE);
+	}
+	numa_init_array();
+	return 0;
+}
+
+static int node_to_pxm(int n)
+{
+       int i;
+       if (pxm2node[n] == n)
+               return n;
+       for (i = 0; i < 256; i++)
+               if (pxm2node[i] == n)
+                       return i;
+       return 0;
+}
+
+int __node_distance(int a, int b)
+{
+	int index;
+
+	if (!acpi_slit)
+		return a == b ? 10 : 20;
+	index = acpi_slit->localities * node_to_pxm(a);
+	return acpi_slit->entry[index + node_to_pxm(b)];
+}
+
+EXPORT_SYMBOL(__node_distance);
diff --git a/xen/common/memory.c b/xen/common/memory.c
index c2827fa59f..9ab62a48ec 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -41,6 +41,8 @@ increase_reservation(
     struct page_info *page;
     unsigned long i;
     xen_pfn_t mfn;
+    /* use domain's first processor for locality parameter */
+    unsigned int cpu = d->vcpu[0]->processor;
 
     if ( !guest_handle_is_null(extent_list) &&
          !guest_handle_okay(extent_list, nr_extents) )
@@ -58,8 +60,8 @@ increase_reservation(
             return i;
         }
 
-        if ( unlikely((page = alloc_domheap_pages(
-            d, extent_order, memflags)) == NULL) )
+        if ( unlikely((page = __alloc_domheap_pages( d, cpu, 
+            extent_order, memflags )) == NULL) ) 
         {
             DPRINTK("Could not allocate order=%d extent: "
                     "id=%d memflags=%x (%ld of %d)\n",
@@ -92,6 +94,8 @@ populate_physmap(
     unsigned long i, j;
     xen_pfn_t gpfn;
     xen_pfn_t mfn;
+    /* use domain's first processor for locality parameter */
+    unsigned int cpu = d->vcpu[0]->processor;
 
     if ( !guest_handle_okay(extent_list, nr_extents) )
         return 0;
@@ -111,8 +115,8 @@ populate_physmap(
         if ( unlikely(__copy_from_guest_offset(&gpfn, extent_list, i, 1)) )
             goto out;
 
-        if ( unlikely((page = alloc_domheap_pages(
-            d, extent_order, memflags)) == NULL) )
+        if ( unlikely((page = __alloc_domheap_pages( d, cpu, 
+            extent_order, memflags )) == NULL) ) 
         {
             DPRINTK("Could not allocate order=%d extent: "
                     "id=%d memflags=%x (%ld of %d)\n",
@@ -294,7 +298,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
     unsigned long in_chunk_order, out_chunk_order;
     xen_pfn_t     gpfn, gmfn, mfn;
     unsigned long i, j, k;
-    unsigned int  memflags = 0;
+    unsigned int  memflags = 0, cpu;
     long          rc = 0;
     struct domain *d;
     struct page_info *page;
@@ -368,6 +372,9 @@ memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
     }
     d = current->domain;
 
+    /* use domain's first processor for locality parameter */
+    cpu = d->vcpu[0]->processor;
+
     for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
     {
         if ( hypercall_preempt_check() )
@@ -413,8 +420,8 @@ memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
         /* Allocate a chunk's worth of anonymous output pages. */
         for ( j = 0; j < (1UL << out_chunk_order); j++ )
         {
-            page = alloc_domheap_pages(
-                NULL, exch.out.extent_order, memflags);
+            page = __alloc_domheap_pages( NULL, cpu, 
+                  exch.out.extent_order, memflags);
             if ( unlikely(page == NULL) )
             {
                 rc = -ENOMEM;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index fbbe837780..f4a1adc274 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -4,6 +4,7 @@
  * Simple buddy heap allocator for Xen.
  * 
  * Copyright (c) 2002-2004 K A Fraser
+ * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -33,6 +34,8 @@
 #include <xen/domain_page.h>
 #include <xen/keyhandler.h>
 #include <xen/perfc.h>
+#include <xen/numa.h>
+#include <xen/nodemask.h>
 #include <asm/page.h>
 
 /*
@@ -247,22 +250,23 @@ unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
 #define pfn_dom_zone_type(_pfn)                                 \
     (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
 
-static struct list_head heap[NR_ZONES][MAX_ORDER+1];
+static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
 
-static unsigned long avail[NR_ZONES];
+static unsigned long avail[NR_ZONES][MAX_NUMNODES];
 
 static DEFINE_SPINLOCK(heap_lock);
 
 void end_boot_allocator(void)
 {
-    unsigned long i, j;
+    unsigned long i, j, k;
     int curr_free = 0, next_free = 0;
 
     memset(avail, 0, sizeof(avail));
 
     for ( i = 0; i < NR_ZONES; i++ )
-        for ( j = 0; j <= MAX_ORDER; j++ )
-            INIT_LIST_HEAD(&heap[i][j]);
+        for ( j = 0; j < MAX_NUMNODES; j++ )
+            for ( k = 0; k <= MAX_ORDER; k++ )
+                INIT_LIST_HEAD(&heap[i][j][k]);
 
     /* Pages that are free now go to the domain sub-allocator. */
     for ( i = 0; i < max_page; i++ )
@@ -272,29 +276,59 @@ void end_boot_allocator(void)
         if ( next_free )
             map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
         if ( curr_free )
-            free_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 0);
+            init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
     }
 }
 
-/* Hand the specified arbitrary page range to the specified heap zone. */
+/* 
+ * Hand the specified arbitrary page range to the specified heap zone
+ * checking the node_id of the previous page.  If they differ and the
+ * latter is not on a MAX_ORDER boundary, then we reserve the page by
+ * not freeing it to the buddy allocator.
+ */
+#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
 void init_heap_pages(
     unsigned int zone, struct page_info *pg, unsigned long nr_pages)
 {
+    unsigned int nid_curr,nid_prev;
     unsigned long i;
 
     ASSERT(zone < NR_ZONES);
 
+    if ( likely(page_to_mfn(pg) != 0) )
+        nid_prev = phys_to_nid(page_to_maddr(pg-1));
+    else
+        nid_prev = phys_to_nid(page_to_maddr(pg));
+
     for ( i = 0; i < nr_pages; i++ )
-        free_heap_pages(zone, pg+i, 0);
+    {
+        nid_curr = phys_to_nid(page_to_maddr(pg+i));
+
+        /*
+         * free pages of the same node, or if they differ, but are on a
+         * MAX_ORDER alignement boundary (which already get reserved)
+         */
+         if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
+                                         MAX_ORDER_ALIGNED) )
+             free_heap_pages(zone, pg+i, 0);
+         else
+             printk("Reserving non-aligned node boundary @ mfn %lu\n",
+                    page_to_mfn(pg+i));
+
+        nid_prev = nid_curr;
+    }
 }
 
-
 /* Allocate 2^@order contiguous pages. */
-struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order)
+struct page_info *alloc_heap_pages(unsigned int zone, unsigned int cpu,
+                                   unsigned int order)
 {
-    int i;
+    unsigned int i,j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
+    unsigned int request = (1UL << order);
     struct page_info *pg;
 
+    ASSERT(node >= 0);
+    ASSERT(node < num_nodes);
     ASSERT(zone < NR_ZONES);
 
     if ( unlikely(order > MAX_ORDER) )
@@ -302,29 +336,46 @@ struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order)
 
     spin_lock(&heap_lock);
 
-    /* Find smallest order which can satisfy the request. */
-    for ( i = order; i <= MAX_ORDER; i++ )
-        if ( !list_empty(&heap[zone][i]) )
-            goto found;
+    /* start with requested node, but exhaust all node memory
+     * in requested zone before failing, only calc new node
+     * value if we fail to find memory in target node, this avoids
+     * needless computation on fast-path */
+    for ( i = 0; i < num_nodes; i++ )
+    {
+        /* check if target node can support the allocation */
+        if ( avail[zone][node] >= request )
+        {
+            /* Find smallest order which can satisfy the request. */
+            for ( j = order; j <= MAX_ORDER; j++ )
+            {
+                if ( !list_empty(&heap[zone][node][j]) )
+                    goto found;
+            }
+        }
+        /* pick next node, wrapping around if needed */
+        if ( ++node == num_nodes )
+            node = 0;
+    }
 
     /* No suitable memory blocks. Fail the request. */
     spin_unlock(&heap_lock);
     return NULL;
 
  found: 
-    pg = list_entry(heap[zone][i].next, struct page_info, list);
+    pg = list_entry(heap[zone][node][j].next, struct page_info, list);
     list_del(&pg->list);
 
     /* We may have to halve the chunk a number of times. */
-    while ( i != order )
+    while ( j != order )
     {
-        PFN_ORDER(pg) = --i;
-        list_add_tail(&pg->list, &heap[zone][i]);
-        pg += 1 << i;
+        PFN_ORDER(pg) = --j;
+        list_add_tail(&pg->list, &heap[zone][node][j]);
+        pg += 1 << j;
     }
     
-    map_alloc(page_to_mfn(pg), 1 << order);
-    avail[zone] -= 1 << order;
+    map_alloc(page_to_mfn(pg), request);
+    ASSERT(avail[zone][node] >= request);
+    avail[zone][node] -= request;
 
     spin_unlock(&heap_lock);
 
@@ -337,14 +388,17 @@ void free_heap_pages(
     unsigned int zone, struct page_info *pg, unsigned int order)
 {
     unsigned long mask;
+    int node = phys_to_nid(page_to_maddr(pg));
 
     ASSERT(zone < NR_ZONES);
     ASSERT(order <= MAX_ORDER);
+    ASSERT(node >= 0);
+    ASSERT(node < num_online_nodes());
 
     spin_lock(&heap_lock);
 
     map_free(page_to_mfn(pg), 1 << order);
-    avail[zone] += 1 << order;
+    avail[zone][node] += 1 << order;
     
     /* Merge chunks as far as possible. */
     while ( order < MAX_ORDER )
@@ -370,10 +424,13 @@ void free_heap_pages(
         }
         
         order++;
+
+        /* after merging, pg should be in the same node */
+        ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
     }
 
     PFN_ORDER(pg) = order;
-    list_add_tail(&pg->list, &heap[zone][order]);
+    list_add_tail(&pg->list, &heap[zone][node][order]);
 
     spin_unlock(&heap_lock);
 }
@@ -466,7 +523,7 @@ void *alloc_xenheap_pages(unsigned int order)
     int i;
 
     local_irq_save(flags);
-    pg = alloc_heap_pages(MEMZONE_XEN, order);
+    pg = alloc_heap_pages(MEMZONE_XEN, smp_processor_id(), order);
     local_irq_restore(flags);
 
     if ( unlikely(pg == NULL) )
@@ -580,8 +637,9 @@ int assign_pages(
 }
 
 
-struct page_info *alloc_domheap_pages(
-    struct domain *d, unsigned int order, unsigned int memflags)
+struct page_info *__alloc_domheap_pages(
+    struct domain *d, unsigned int cpu, unsigned int order, 
+    unsigned int memflags)
 {
     struct page_info *pg = NULL;
     cpumask_t mask;
@@ -591,17 +649,17 @@ struct page_info *alloc_domheap_pages(
 
     if ( !(memflags & MEMF_dma) )
     {
-        pg = alloc_heap_pages(MEMZONE_DOM, order);
+        pg = alloc_heap_pages(MEMZONE_DOM, cpu, order);
         /* Failure? Then check if we can fall back to the DMA pool. */
         if ( unlikely(pg == NULL) &&
              ((order > MAX_ORDER) ||
-              (avail[MEMZONE_DMADOM] <
+              (avail_heap_pages(MEMZONE_DMADOM,-1) <
                (lowmem_emergency_pool_pages + (1UL << order)))) )
             return NULL;
     }
 
     if ( pg == NULL )
-        if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL )
+        if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, cpu, order)) == NULL )
             return NULL;
 
     mask = pg->u.free.cpumask;
@@ -640,6 +698,11 @@ struct page_info *alloc_domheap_pages(
     return pg;
 }
 
+inline struct page_info *alloc_domheap_pages(
+    struct domain *d, unsigned int order, unsigned int flags)
+{
+    return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
+}
 
 void free_domheap_pages(struct page_info *pg, unsigned int order)
 {
@@ -714,13 +777,27 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
 }
 
 
+unsigned long avail_heap_pages(int zone, int node)
+{
+    int i,j, num_nodes = num_online_nodes();
+    unsigned long free_pages = 0;
+   
+    for (i=0; i<NR_ZONES; i++)
+        if ( (zone == -1) || (zone == i) )
+            for (j=0; j < num_nodes; j++)
+                if ( (node == -1) || (node == j) )
+                    free_pages += avail[i][j];            
+
+    return free_pages;
+}
+
 unsigned long avail_domheap_pages(void)
 {
     unsigned long avail_nrm, avail_dma;
+    
+    avail_nrm = avail_heap_pages(MEMZONE_DOM,-1);
 
-    avail_nrm = avail[MEMZONE_DOM];
-
-    avail_dma = avail[MEMZONE_DMADOM];
+    avail_dma = avail_heap_pages(MEMZONE_DMADOM,-1);
     if ( avail_dma > lowmem_emergency_pool_pages )
         avail_dma -= lowmem_emergency_pool_pages;
     else
@@ -729,6 +806,10 @@ unsigned long avail_domheap_pages(void)
     return avail_nrm + avail_dma;
 }
 
+unsigned long avail_nodeheap_pages(int node)
+{
+    return avail_heap_pages(-1, node);
+}
 
 static void pagealloc_keyhandler(unsigned char key)
 {
@@ -736,9 +817,9 @@ static void pagealloc_keyhandler(unsigned char key)
     printk("    Xen heap: %lukB free\n"
            "    DMA heap: %lukB free\n"
            "    Dom heap: %lukB free\n",
-           avail[MEMZONE_XEN]<<(PAGE_SHIFT-10),
-           avail[MEMZONE_DMADOM]<<(PAGE_SHIFT-10),
-           avail[MEMZONE_DOM]<<(PAGE_SHIFT-10));
+           avail_heap_pages(MEMZONE_XEN, -1) << (PAGE_SHIFT-10), 
+           avail_heap_pages(MEMZONE_DMADOM, -1) <<(PAGE_SHIFT-10), 
+           avail_heap_pages(MEMZONE_DOM, -1) <<(PAGE_SHIFT-10));
 }
 
 
@@ -806,6 +887,46 @@ unsigned long avail_scrub_pages(void)
     return scrub_pages;
 }
 
+static unsigned long count_bucket(struct list_head* l, int order)
+{
+    unsigned long total_pages = 0;
+    int pages = 1 << order;
+    struct page_info *pg;
+
+    list_for_each_entry(pg, l, list)
+        total_pages += pages;
+
+    return total_pages;
+}
+
+static void dump_heap(unsigned char key)
+{
+    s_time_t       now = NOW();
+    int i,j,k;
+    unsigned long total;
+
+    printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
+           (u32)(now>>32), (u32)now);
+
+    for (i=0; i<NR_ZONES; i++ )
+        for (j=0;j<MAX_NUMNODES;j++)
+            for (k=0;k<=MAX_ORDER;k++)
+                if ( !list_empty(&heap[i][j][k]) )
+                {
+                    total = count_bucket(&heap[i][j][k], k);
+                    printk("heap[%d][%d][%d]-> %lu pages\n",
+                            i, j, k, total);
+                }
+}
+
+static __init int register_heap_trigger(void)
+{
+    register_keyhandler('H', dump_heap, "dump heap info");
+    return 0;
+}
+__initcall(register_heap_trigger);
+
+
 static __init int page_scrub_init(void)
 {
     open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
diff --git a/xen/drivers/acpi/Makefile b/xen/drivers/acpi/Makefile
index 68dafe3a52..08844a529d 100644
--- a/xen/drivers/acpi/Makefile
+++ b/xen/drivers/acpi/Makefile
@@ -1 +1,2 @@
 obj-y += tables.o
+obj-y += numa.o
diff --git a/xen/drivers/acpi/numa.c b/xen/drivers/acpi/numa.c
new file mode 100644
index 0000000000..ecf426ece4
--- /dev/null
+++ b/xen/drivers/acpi/numa.c
@@ -0,0 +1,216 @@
+/*
+ *  acpi_numa.c - ACPI NUMA support
+ *
+ *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+#if 0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/acpi.h>
+#include <xen/numa.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acmacros.h>
+#include <asm/page.h> /* __va() */
+
+#define ACPI_NUMA	0x80000000
+#define _COMPONENT	ACPI_NUMA
+ACPI_MODULE_NAME("numa")
+
+extern int __init acpi_table_parse_madt_family(enum acpi_table_id id,
+					       unsigned long madt_size,
+					       int entry_id,
+					       acpi_madt_entry_handler handler,
+					       unsigned int max_entries);
+
+void __init acpi_table_print_srat_entry(acpi_table_entry_header * header)
+{
+
+	ACPI_FUNCTION_NAME("acpi_table_print_srat_entry");
+
+	if (!header)
+		return;
+
+	switch (header->type) {
+
+	case ACPI_SRAT_PROCESSOR_AFFINITY:
+#ifdef ACPI_DEBUG_OUTPUT
+		{
+			struct acpi_table_processor_affinity *p =
+			    (struct acpi_table_processor_affinity *)header;
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+					  "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
+					  p->apic_id, p->lsapic_eid,
+					  p->proximity_domain,
+					  p->flags.
+					  enabled ? "enabled" : "disabled"));
+		}
+#endif				/* ACPI_DEBUG_OUTPUT */
+		break;
+
+	case ACPI_SRAT_MEMORY_AFFINITY:
+#ifdef ACPI_DEBUG_OUTPUT
+		{
+			struct acpi_table_memory_affinity *p =
+			    (struct acpi_table_memory_affinity *)header;
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+					  "SRAT Memory (0x%08x%08x length 0x%08x%08x type 0x%x) in proximity domain %d %s%s\n",
+					  p->base_addr_hi, p->base_addr_lo,
+					  p->length_hi, p->length_lo,
+					  p->memory_type, p->proximity_domain,
+					  p->flags.
+					  enabled ? "enabled" : "disabled",
+					  p->flags.
+					  hot_pluggable ? " hot-pluggable" :
+					  ""));
+		}
+#endif				/* ACPI_DEBUG_OUTPUT */
+		break;
+
+	default:
+		printk(KERN_WARNING PREFIX
+		       "Found unsupported SRAT entry (type = 0x%x)\n",
+		       header->type);
+		break;
+	}
+}
+
+static int __init acpi_parse_slit(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_slit *slit;
+	u32 localities;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	slit = (struct acpi_table_slit *)__va(phys_addr);
+
+	/* downcast just for %llu vs %lu for i386/ia64  */
+	localities = (u32) slit->localities;
+
+	acpi_numa_slit_init(slit);
+
+	return 0;
+}
+
+static int __init
+acpi_parse_processor_affinity(acpi_table_entry_header * header,
+			      const unsigned long end)
+{
+	struct acpi_table_processor_affinity *processor_affinity;
+
+	processor_affinity = (struct acpi_table_processor_affinity *)header;
+	if (!processor_affinity)
+		return -EINVAL;
+
+	acpi_table_print_srat_entry(header);
+
+	/* let architecture-dependent part to do it */
+	acpi_numa_processor_affinity_init(processor_affinity);
+
+	return 0;
+}
+
+static int __init
+acpi_parse_memory_affinity(acpi_table_entry_header * header,
+			   const unsigned long end)
+{
+	struct acpi_table_memory_affinity *memory_affinity;
+
+	memory_affinity = (struct acpi_table_memory_affinity *)header;
+	if (!memory_affinity)
+		return -EINVAL;
+
+	acpi_table_print_srat_entry(header);
+
+	/* let architecture-dependent part to do it */
+	acpi_numa_memory_affinity_init(memory_affinity);
+
+	return 0;
+}
+
+static int __init acpi_parse_srat(unsigned long phys_addr, unsigned long size)
+{
+	struct acpi_table_srat *srat;
+
+	if (!phys_addr || !size)
+		return -EINVAL;
+
+	srat = (struct acpi_table_srat *)__va(phys_addr);
+
+	return 0;
+}
+
+int __init
+acpi_table_parse_srat(enum acpi_srat_entry_id id,
+		      acpi_madt_entry_handler handler, unsigned int max_entries)
+{
+	return acpi_table_parse_madt_family(ACPI_SRAT,
+					    sizeof(struct acpi_table_srat), id,
+					    handler, max_entries);
+}
+
+int __init acpi_numa_init(void)
+{
+	int result;
+
+	/* SRAT: Static Resource Affinity Table */
+	result = acpi_table_parse(ACPI_SRAT, acpi_parse_srat);
+
+	if (result > 0) {
+		result = acpi_table_parse_srat(ACPI_SRAT_PROCESSOR_AFFINITY,
+					       acpi_parse_processor_affinity,
+					       NR_CPUS);
+		result = acpi_table_parse_srat(ACPI_SRAT_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS);	// IA64 specific
+	}
+
+	/* SLIT: System Locality Information Table */
+	result = acpi_table_parse(ACPI_SLIT, acpi_parse_slit);
+
+	acpi_numa_arch_fixup();
+	return 0;
+}
+
+#if 0
+int acpi_get_pxm(acpi_handle h)
+{
+	unsigned long pxm;
+	acpi_status status;
+	acpi_handle handle;
+	acpi_handle phandle = h;
+
+	do {
+		handle = phandle;
+		status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+		if (ACPI_SUCCESS(status))
+			return (int)pxm;
+		status = acpi_get_parent(handle, &phandle);
+	} while (ACPI_SUCCESS(status));
+	return -1;
+}
+
+EXPORT_SYMBOL(acpi_get_pxm);
+#endif
diff --git a/xen/include/asm-ia64/vmx_platform.h b/xen/include/asm-ia64/vmx_platform.h
index 33c4003cf3..07d05a68c6 100644
--- a/xen/include/asm-ia64/vmx_platform.h
+++ b/xen/include/asm-ia64/vmx_platform.h
@@ -24,6 +24,8 @@
 #include <asm/hvm/vioapic.h>
 struct mmio_list;
 typedef struct virtual_platform_def {
+    unsigned long          buffered_io_va;
+    spinlock_t             buffered_io_lock;
     unsigned long       shared_page_va;
     unsigned long       pib_base;
     unsigned char       xtp;
diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h
index 51c4b8e293..227f76325c 100644
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -157,6 +157,9 @@ static inline void check_acpi_pci(void) { }
 
 static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
 static inline int acpi_irq_balance_set(char *str) { return 0; }
+extern int acpi_scan_nodes(u64 start, u64 end);
+extern int acpi_numa;
+#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
 
 #ifdef CONFIG_ACPI_SLEEP
 
@@ -173,5 +176,6 @@ extern void acpi_reserve_bootmem(void);
 #endif /*CONFIG_ACPI_SLEEP*/
 
 extern u8 x86_acpiid_to_apicid[];
+#define MAX_LOCAL_APIC 256
 
 #endif /*_ASM_ACPI_H*/
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index e2ef90700c..879bdbf80b 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -24,6 +24,11 @@
 #define CONFIG_X86_IO_APIC 1
 #define CONFIG_HPET_TIMER 1
 #define CONFIG_X86_MCE_P4THERMAL 1
+#define CONFIG_ACPI_NUMA 1
+#define CONFIG_NUMA 1
+#define CONFIG_ACPI_SRAT 1
+#define CONFIG_DISCONTIGMEM 1
+#define CONFIG_NUMA_EMU 1
 
 /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
 #define CONFIG_X86_L1_CACHE_SHIFT 7
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 6561519cb1..0ebec779c1 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -23,7 +23,7 @@
 #define __ASM_X86_HVM_DOMAIN_H__
 
 #include <asm/hvm/vpic.h>
-#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpt.h>
 #include <asm/hvm/vlapic.h>
 #include <asm/hvm/vioapic.h>
 #include <public/hvm/params.h>
diff --git a/xen/include/asm-x86/hvm/vpit.h b/xen/include/asm-x86/hvm/vpt.h
index 83b1af2622..ada8936af7 100644
--- a/xen/include/asm-x86/hvm/vpit.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -1,5 +1,5 @@
 /*
- * vpit.h: Virtual PIT definitions
+ * vpt.h: Virtual Platform Timer definitions
  *
  * Copyright (c) 2004, Intel Corporation.
  *
@@ -17,8 +17,8 @@
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
 
-#ifndef __ASM_X86_HVM_VPIT_H__
-#define __ASM_X86_HVM_VPIT_H__
+#ifndef __ASM_X86_HVM_VPT_H__
+#define __ASM_X86_HVM_VPT_H__
 
 #include <xen/config.h>
 #include <xen/init.h>
@@ -70,7 +70,17 @@ typedef struct RTCState {
     struct vcpu      *vcpu;
     struct periodic_time *pt;
 } RTCState;
-   
+
+#define FREQUENCE_PMTIMER  3579545
+typedef struct PMTState {
+    uint32_t pm1_timer;
+    uint32_t pm1_status;
+    uint64_t last_gtime;
+    struct timer timer;
+    uint64_t scale;
+    struct vcpu *vcpu;
+} PMTState;
+
 /*
  * Abstract layer of periodic time, one short time.
  */
@@ -95,7 +105,7 @@ struct pl_time {    /* platform time */
     struct periodic_time periodic_tm;
     struct PITState      vpit;
     struct RTCState      vrtc;
-    /* TODO: ACPI time */
+    struct PMTState      vpmt;
 };
 
 static __inline__ s_time_t get_scheduled(
@@ -132,8 +142,10 @@ extern void destroy_periodic_time(struct periodic_time *pt);
 void pit_init(struct vcpu *v, unsigned long cpu_khz);
 void rtc_init(struct vcpu *v, int base, int irq);
 void rtc_deinit(struct domain *d);
+void pmtimer_init(struct vcpu *v, int base);
+void pmtimer_deinit(struct domain *d);
 int is_rtc_periodic_irq(void *opaque);
 void pt_timer_fn(void *data);
 void pit_time_fired(struct vcpu *v, void *priv);
 
-#endif /* __ASM_X86_HVM_VPIT_H__ */
+#endif /* __ASM_X86_HVM_VPT_H__ */
diff --git a/xen/include/asm-x86/mach-generic/mach_apic.h b/xen/include/asm-x86/mach-generic/mach_apic.h
index 1d3ed4dc67..1e0a6019d6 100644
--- a/xen/include/asm-x86/mach-generic/mach_apic.h
+++ b/xen/include/asm-x86/mach-generic/mach_apic.h
@@ -22,11 +22,7 @@ static inline void enable_apic_mode(void)
 	return;
 }
 
-/* No sane NUMA support right now. We should parse ACPI SRAT. */
-static inline int apicid_to_node(int logical_apicid)
-{
-	return 0;
-}
+#define apicid_to_node(apicid) ((int)apicid_to_node[(u8)apicid])
 
 extern u8 bios_cpu_apicid[];
 static inline int cpu_present_to_apicid(int mps_cpu)
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
new file mode 100644
index 0000000000..caa6491c96
--- /dev/null
+++ b/xen/include/asm-x86/numa.h
@@ -0,0 +1,78 @@
+#ifndef _ASM_X8664_NUMA_H 
+#define _ASM_X8664_NUMA_H 1
+
+#include <xen/cpumask.h>
+
+#define NODES_SHIFT 6
+
+extern unsigned char cpu_to_node[];
+extern cpumask_t     node_to_cpumask[];
+
+#define cpu_to_node(cpu)		(cpu_to_node[cpu])
+#define parent_node(node)		(node)
+#define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
+#define node_to_cpumask(node)    (node_to_cpumask[node])
+
+struct node { 
+	u64 start,end; 
+};
+
+extern int compute_hash_shift(struct node *nodes, int numnodes);
+extern int pxm_to_node(int nid);
+
+#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
+#define VIRTUAL_BUG_ON(x) 
+#define NODEMAPSIZE 0xfff
+
+extern void numa_add_cpu(int cpu);
+extern void numa_init_array(void);
+extern int numa_off;
+
+extern void numa_set_node(int cpu, int node);
+
+extern void setup_node_bootmem(int nodeid, u64 start, u64 end);
+extern unsigned char apicid_to_node[256];
+#ifdef CONFIG_NUMA
+extern void __init init_cpu_to_node(void);
+
+static inline void clear_node_cpumask(int cpu)
+{
+	clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+}
+
+/* Simple perfect hash to map physical addresses to node numbers */
+extern int memnode_shift; 
+extern u8  memnodemap[NODEMAPSIZE]; 
+
+struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+    unsigned int  node_id;
+};
+
+extern struct node_data node_data[];
+
+static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 
+{ 
+	unsigned nid; 
+	VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
+	nid = memnodemap[addr >> memnode_shift]; 
+	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 
+	return nid; 
+} 
+
+#define NODE_DATA(nid)		(&(node_data[nid]))
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
+				 NODE_DATA(nid)->node_spanned_pages)
+
+
+#else
+#define init_cpu_to_node() do {} while (0)
+#define clear_node_cpumask(cpu) do {} while (0)
+#endif
+
+#define NUMA_NO_NODE 0xff
+
+#endif
diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h
index fd05ff9233..d7b35b4524 100644
--- a/xen/include/public/arch-ia64.h
+++ b/xen/include/public/arch-ia64.h
@@ -68,6 +68,9 @@ typedef unsigned long xen_ulong_t;
 #define STORE_PAGE_START (IO_PAGE_START + IO_PAGE_SIZE)
 #define STORE_PAGE_SIZE	 PAGE_SIZE
 
+#define BUFFER_IO_PAGE_START (STORE_PAGE_START+PAGE_SIZE)
+#define BUFFER_IO_PAGE_SIZE PAGE_SIZE
+
 #define IO_SAPIC_START   0xfec00000UL
 #define IO_SAPIC_SIZE    0x100000
 
diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h
index 8e92b004b1..992505e1c5 100644
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -86,6 +86,10 @@ struct buffered_iopage {
 };            /* sizeof this structure must be in one page */
 typedef struct buffered_iopage buffered_iopage_t;
 
+#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
+#define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
+#define ACPI_PM_TMR_BLK_ADDRESS             (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
+
 #endif /* _IOREQ_H_ */
 
 /*
diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
index e3f94d5843..f79472da77 100644
--- a/xen/include/xen/config.h
+++ b/xen/include/xen/config.h
@@ -50,5 +50,7 @@
 #endif /* !__ASSEMBLY__ */
 
 #define fastcall
+#define __cpuinitdata
+#define __cpuinit
 
 #endif /* __XEN_CONFIG_H__ */
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 8c9713971b..4d05f6917f 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -45,7 +45,8 @@ void end_boot_allocator(void);
 /* Generic allocator. These functions are *not* interrupt-safe. */
 void init_heap_pages(
     unsigned int zone, struct page_info *pg, unsigned long nr_pages);
-struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order);
+struct page_info *alloc_heap_pages(
+    unsigned int zone, unsigned int cpu, unsigned int order);
 void free_heap_pages(
     unsigned int zone, struct page_info *pg, unsigned int order);
 void scrub_heap_pages(void);
@@ -61,8 +62,12 @@ void free_xenheap_pages(void *v, unsigned int order);
 void init_domheap_pages(paddr_t ps, paddr_t pe);
 struct page_info *alloc_domheap_pages(
     struct domain *d, unsigned int order, unsigned int memflags);
+struct page_info *__alloc_domheap_pages(
+    struct domain *d, unsigned int cpu, unsigned int order, 
+    unsigned int memflags);
 void free_domheap_pages(struct page_info *pg, unsigned int order);
 unsigned long avail_domheap_pages(void);
+unsigned long avail_heap_pages(int zone, int node);
 #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
 #define free_domheap_page(p)  (free_domheap_pages(p,0))
 
diff --git a/xen/include/xen/nodemask.h b/xen/include/xen/nodemask.h
new file mode 100644
index 0000000000..30ed6f4524
--- /dev/null
+++ b/xen/include/xen/nodemask.h
@@ -0,0 +1,338 @@
+#ifndef __LINUX_NODEMASK_H
+#define __LINUX_NODEMASK_H
+
+/*
+ * Nodemasks provide a bitmap suitable for representing the
+ * set of Node's in a system, one bit position per Node number.
+ *
+ * See detailed comments in the file linux/bitmap.h describing the
+ * data type on which these nodemasks are based.
+ *
+ * For details of nodemask_scnprintf() and nodemask_parse(),
+ * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ *
+ * The available nodemask operations are:
+ *
+ * void node_set(node, mask)		turn on bit 'node' in mask
+ * void node_clear(node, mask)		turn off bit 'node' in mask
+ * void nodes_setall(mask)		set all bits
+ * void nodes_clear(mask)		clear all bits
+ * int node_isset(node, mask)		true iff bit 'node' set in mask
+ * int node_test_and_set(node, mask)	test and set bit 'node' in mask
+ *
+ * void nodes_and(dst, src1, src2)	dst = src1 & src2  [intersection]
+ * void nodes_or(dst, src1, src2)	dst = src1 | src2  [union]
+ * void nodes_xor(dst, src1, src2)	dst = src1 ^ src2
+ * void nodes_andnot(dst, src1, src2)	dst = src1 & ~src2
+ * void nodes_complement(dst, src)	dst = ~src
+ *
+ * int nodes_equal(mask1, mask2)	Does mask1 == mask2?
+ * int nodes_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
+ * int nodes_subset(mask1, mask2)	Is mask1 a subset of mask2?
+ * int nodes_empty(mask)		Is mask empty (no bits sets)?
+ * int nodes_full(mask)			Is mask full (all bits sets)?
+ * int nodes_weight(mask)		Hamming weight - number of set bits
+ *
+ * void nodes_shift_right(dst, src, n)	Shift right
+ * void nodes_shift_left(dst, src, n)	Shift left
+ *
+ * int first_node(mask)			Number lowest set bit, or MAX_NUMNODES
+ * int next_node(node, mask)		Next node past 'node', or MAX_NUMNODES
+ * int first_unset_node(mask)		First node not set in mask, or 
+ *					MAX_NUMNODES.
+ *
+ * nodemask_t nodemask_of_node(node)	Return nodemask with bit 'node' set
+ * NODE_MASK_ALL			Initializer - all bits set
+ * NODE_MASK_NONE			Initializer - no bits set
+ * unsigned long *nodes_addr(mask)	Array of unsigned long's in mask
+ *
+ * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
+ * int nodemask_parse(ubuf, ulen, mask)	Parse ascii string as nodemask
+ *
+ * for_each_node_mask(node, mask)	for-loop node over mask
+ *
+ * int num_online_nodes()		Number of online Nodes
+ * int num_possible_nodes()		Number of all possible Nodes
+ *
+ * int node_online(node)		Is some node online?
+ * int node_possible(node)		Is some node possible?
+ *
+ * int any_online_node(mask)		First online node in mask
+ *
+ * node_set_online(node)		set bit 'node' in node_online_map
+ * node_set_offline(node)		clear bit 'node' in node_online_map
+ *
+ * for_each_node(node)			for-loop node over node_possible_map
+ * for_each_online_node(node)		for-loop node over node_online_map
+ *
+ * Subtlety:
+ * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway)
+ *    to generate slightly worse code.  So use a simple one-line #define
+ *    for node_isset(), instead of wrapping an inline inside a macro, the
+ *    way we do the other calls.
+ */
+
+#include <xen/kernel.h>
+#include <xen/bitmap.h>
+#include <xen/numa.h>
+
+typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
+extern nodemask_t _unused_nodemask_arg_;
+
+#define node_set(node, dst) __node_set((node), &(dst))
+static inline void __node_set(int node, volatile nodemask_t *dstp)
+{
+	set_bit(node, dstp->bits);
+}
+
+#define node_clear(node, dst) __node_clear((node), &(dst))
+static inline void __node_clear(int node, volatile nodemask_t *dstp)
+{
+	clear_bit(node, dstp->bits);
+}
+
+#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
+static inline void __nodes_setall(nodemask_t *dstp, int nbits)
+{
+	bitmap_fill(dstp->bits, nbits);
+}
+
+#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
+static inline void __nodes_clear(nodemask_t *dstp, int nbits)
+{
+	bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define node_isset(node, nodemask) test_bit((node), (nodemask).bits)
+
+#define node_test_and_set(node, nodemask) \
+			__node_test_and_set((node), &(nodemask))
+static inline int __node_test_and_set(int node, nodemask_t *addr)
+{
+	return test_and_set_bit(node, addr->bits);
+}
+
+#define nodes_and(dst, src1, src2) \
+			__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_or(dst, src1, src2) \
+			__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_xor(dst, src1, src2) \
+			__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_andnot(dst, src1, src2) \
+			__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
+static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_complement(dst, src) \
+			__nodes_complement(&(dst), &(src), MAX_NUMNODES)
+static inline void __nodes_complement(nodemask_t *dstp,
+					const nodemask_t *srcp, int nbits)
+{
+	bitmap_complement(dstp->bits, srcp->bits, nbits);
+}
+
+#define nodes_equal(src1, src2) \
+			__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
+static inline int __nodes_equal(const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_intersects(src1, src2) \
+			__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
+static inline int __nodes_intersects(const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_subset(src1, src2) \
+			__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
+static inline int __nodes_subset(const nodemask_t *src1p,
+					const nodemask_t *src2p, int nbits)
+{
+	return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
+static inline int __nodes_empty(const nodemask_t *srcp, int nbits)
+{
+	return bitmap_empty(srcp->bits, nbits);
+}
+
+#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
+static inline int __nodes_full(const nodemask_t *srcp, int nbits)
+{
+	return bitmap_full(srcp->bits, nbits);
+}
+
+#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
+static inline int __nodes_weight(const nodemask_t *srcp, int nbits)
+{
+	return bitmap_weight(srcp->bits, nbits);
+}
+
+#define nodes_shift_right(dst, src, n) \
+			__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
+static inline void __nodes_shift_right(nodemask_t *dstp,
+					const nodemask_t *srcp, int n, int nbits)
+{
+	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define nodes_shift_left(dst, src, n) \
+			__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
+static inline void __nodes_shift_left(nodemask_t *dstp,
+					const nodemask_t *srcp, int n, int nbits)
+{
+	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+
+/* FIXME: better would be to fix all architectures to never return
+          > MAX_NUMNODES, then the silly min_ts could be dropped. */
+
+#define first_node(src) __first_node(&(src))
+static inline int __first_node(const nodemask_t *srcp)
+{
+	return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
+}
+
+#define next_node(n, src) __next_node((n), &(src))
+static inline int __next_node(int n, const nodemask_t *srcp)
+{
+	return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
+}
+
+#define nodemask_of_node(node)						\
+({									\
+	typeof(_unused_nodemask_arg_) m;				\
+	if (sizeof(m) == sizeof(unsigned long)) {			\
+		m.bits[0] = 1UL<<(node);				\
+	} else {							\
+		nodes_clear(m);						\
+		node_set((node), m);					\
+	}								\
+	m;								\
+})
+
+#define first_unset_node(mask) __first_unset_node(&(mask))
+static inline int __first_unset_node(const nodemask_t *maskp)
+{
+	return min_t(int,MAX_NUMNODES,
+			find_first_zero_bit(maskp->bits, MAX_NUMNODES));
+}
+
+#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
+
+#if MAX_NUMNODES <= BITS_PER_LONG
+
+#define NODE_MASK_ALL							\
+((nodemask_t) { {							\
+	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
+} })
+
+#else
+
+#define NODE_MASK_ALL							\
+((nodemask_t) { {							\
+	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL,			\
+	[BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD		\
+} })
+
+#endif
+
+#define NODE_MASK_NONE							\
+((nodemask_t) { {							\
+	[0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] =  0UL			\
+} })
+
+#define nodes_addr(src) ((src).bits)
+
+#if 0
+#define nodemask_scnprintf(buf, len, src) \
+			__nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES)
+static inline int __nodemask_scnprintf(char *buf, int len,
+					const nodemask_t *srcp, int nbits)
+{
+	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#define nodemask_parse(ubuf, ulen, dst) \
+			__nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES)
+static inline int __nodemask_parse(const char __user *buf, int len,
+					nodemask_t *dstp, int nbits)
+{
+	return bitmap_parse(buf, len, dstp->bits, nbits);
+}
+#endif
+
+#if MAX_NUMNODES > 1
+#define for_each_node_mask(node, mask)			\
+	for ((node) = first_node(mask);			\
+		(node) < MAX_NUMNODES;			\
+		(node) = next_node((node), (mask)))
+#else /* MAX_NUMNODES == 1 */
+#define for_each_node_mask(node, mask)			\
+	if (!nodes_empty(mask))				\
+		for ((node) = 0; (node) < 1; (node)++)
+#endif /* MAX_NUMNODES */
+
+/*
+ * The following particular system nodemasks and operations
+ * on them manage all possible and online nodes.
+ */
+
+extern nodemask_t node_online_map;
+extern nodemask_t node_possible_map;
+
+#if MAX_NUMNODES > 1
+#define num_online_nodes()	nodes_weight(node_online_map)
+#define num_possible_nodes()	nodes_weight(node_possible_map)
+#define node_online(node)	node_isset((node), node_online_map)
+#define node_possible(node)	node_isset((node), node_possible_map)
+#else
+#define num_online_nodes()	1
+#define num_possible_nodes()	1
+#define node_online(node)	((node) == 0)
+#define node_possible(node)	((node) == 0)
+#endif
+
+#define any_online_node(mask)			\
+({						\
+	int node;				\
+	for_each_node_mask(node, (mask))	\
+		if (node_online(node))		\
+			break;			\
+	node;					\
+})
+
+#define node_set_online(node)	   set_bit((node), node_online_map.bits)
+#define node_set_offline(node)	   clear_bit((node), node_online_map.bits)
+
+#define for_each_node(node)	   for_each_node_mask((node), node_possible_map)
+#define for_each_online_node(node) for_each_node_mask((node), node_online_map)
+
+#endif /* __LINUX_NODEMASK_H */
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
new file mode 100644
index 0000000000..9585fc9c48
--- /dev/null
+++ b/xen/include/xen/numa.h
@@ -0,0 +1,13 @@
+#ifndef _XEN_NUMA_H
+#define _XEN_NUMA_H
+
+#include <xen/config.h>
+#include <asm/numa.h>
+
+#ifndef NODES_SHIFT
+#define NODES_SHIFT     0
+#endif
+
+#define MAX_NUMNODES    (1 << NODES_SHIFT)
+
+#endif /* _XEN_NUMA_H */