bitkeeper revision 1.94 (3e5a4f5fzVaxemjfCt0N0OH8PYPiuw)

Rename xen-2.4.16 to just "xen" to reflect that it hasn't got any relation to the Linux kernel version.
author: iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> 2003-02-24 16:59:11 +0000
committer: iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> 2003-02-24 16:59:11 +0000
commit: 0b109673722dd825609e7cc51ca124693f0b8240 (patch)
tree: ba95c49e5ebe85976c365b232c8f48634784cca1 /xen
parent: a48212cb65e09669ed243581556529681cebba0a (diff)
download: xen-0b109673722dd825609e7cc51ca124693f0b8240.tar.gz
xen-0b109673722dd825609e7cc51ca124693f0b8240.tar.bz2
xen-0b109673722dd825609e7cc51ca124693f0b8240.zip
272 files changed, 118704 insertions, 0 deletions
diff --git a/xen/Makefile b/xen/Makefile
new file mode 100644
index 0000000000..3bd4299075
--- /dev/null
+++ b/xen/Makefile
@@ -0,0 +1,41 @@
+
+export BASEDIR := $(shell pwd)
+
+include Rules.mk
+
+default: $(TARGET)
+	gzip -f -9 < $(TARGET) > $(TARGET).gz
+#	objdump -D -S image >image.s
+
+install: $(TARGET)
+	gzip -f -9 < $(TARGET) > $(TARGET).gz
+	cp $(TARGET).gz ../../install/images/image
+
+clean: delete-links
+	$(MAKE) -C tools clean
+	$(MAKE) -C common clean
+	$(MAKE) -C net clean
+	$(MAKE) -C drivers clean
+	$(MAKE) -C arch/$(ARCH) clean
+	rm -f *.o $(TARGET)* *~ core
+
+$(TARGET): make-links
+	$(MAKE) -C tools
+	$(MAKE) -C common
+	$(MAKE) -C net
+	$(MAKE) -C drivers
+	$(MAKE) -C arch/$(ARCH)
+
+make-links:
+	ln -sf xeno include/linux
+	ln -sf asm-$(ARCH) include/asm
+
+delete-links:
+	rm -f include/linux include/asm
+
+SUBDIRS         =arch common drivers net 
+TAGS: 
+	etags `find include/asm-$(ARCH) -name '*.h'`
+	find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a
+	find $(SUBDIRS) -name '*.[ch]' | xargs etags -a
+
diff --git a/xen/README b/xen/README
new file mode 100644
index 0000000000..3518b8254a
--- /dev/null
+++ b/xen/README
@@ -0,0 +1,145 @@
+
+*****************************************************
+   Xeno Hypervisor (18/7/02)
+
+1) Tree layout
+Looks rather like a simplified Linux :-)
+Headers are in include/xeno and include asm-<arch>.
+At build time we create symlinks:
+ include/linux -> include/xeno
+ include/asm   -> include/asm-<arch>
+In this way, Linux device drivers should need less tweaking of
+their #include lines.
+
+For source files, mapping between hypervisor and Linux is:
+ Linux                 Hypervisor
+ -----                 ----------
+ kernel/init/mm/lib -> common
+ net/*              -> net/*
+ drivers/*          -> drivers/*
+ arch/*             -> arch/*
+
+Note that the use of #include <asm/...> and #include <linux/...> can
+lead to confusion, as such files will often exist on the system include
+path, even if a version doesn't exist within the hypervisor tree.
+Unfortunately '-nostdinc' cannot be specified to the compiler, as that
+prevents us using stdarg.h in the compiler's own header directory.
+
+We try to not modify things in driver/* as much as possible, so we can
+easily take updates from Linux. arch/* is basically straight from
+Linux, with fingers in Linux-specific pies hacked off. common/* has
+a lot of Linux code in it, but certain subsystems (task maintenance,
+low-level memory handling) have been replaced. net/* contains enough
+Linux-like gloop to get network drivers to work with little/no
+modification.
+
+2) Building
+'make': Builds ELF executable called 'image' in base directory
+'make install': gzip-compresses 'image' and copies it to TFTP server
+'make clean': removes *all* build and target files
+
+
+*****************************************************
+Random thoughts and stuff from here down...
+
+Todo list
+---------
+* Hypervisor need only directly map its own memory pool
+  (maybe 128MB, tops). That would need 0x08000000....
+  This would allow 512MB Linux with plenty room for vmalloc'ed areas.
+* Network device -- port drivers to hypervisor, implement virtual
+  driver for xeno-linux. Looks like Ethernet.
+  -- Hypervisor needs to do (at a minimum):
+       - packet filtering on tx (unicast IP only)
+       - packet demux on rx     (unicast IP only)
+       - provide DHCP [maybedo something simpler?]
+         and ARP [at least for hypervisor IP address]
+
+
+Segment descriptor tables
+-------------------------
+We want to allow guest OSes to specify GDT and LDT tables using their
+own pages of memory (just like with page tables). So allow the following:
+ * new_table_entry(ptr, val)
+   [Allows insertion of a code, data, or LDT descriptor into given
+    location. Can simply be checked then poked, with no need to look at
+    page type.]
+ * new_GDT() -- relevent virtual pages are resolved to frames. Either
+    (i) page not present; or (ii) page is only mapped read-only and checks
+    out okay (then marked as special page). Old table is resolved first,
+    and the pages are unmarked (no longer special type).
+ * new_LDT() -- same as for new_GDT(), with same special page type.
+
+Page table updates must be hooked, so we look for updates to virtual page
+addresses in the GDT/LDT range. If map to not present, then old physpage
+has type_count decremented. If map to present, ensure read-only, check the
+page, and set special type.
+
+Merge set_{LDT,GDT} into update_baseptr, by passing four args:
+ update_baseptrs(mask, ptab, gdttab, ldttab);
+Update of ptab requires update of gtab (or set to internal default).
+Update of gtab requires update of ltab (or set to internal default).
+
+
+The hypervisor page cache
+-------------------------
+This will allow guest OSes to make use of spare pages in the system, but
+allow them to be immediately used for any new domains or memory requests.
+The idea is that, when a page is laundered and falls off Linux's clean_LRU
+list, rather than freeing it it becomes a candidate for passing down into
+the hypervisor. In return, xeno-linux may ask for one of its previously-
+cached pages back:
+ (page, new_id) = cache_query(page, old_id);
+If the requested page couldn't be kept, a blank page is returned.
+When would Linux make the query? Whenever it wants a page back without
+the delay or going to disc. Also, whenever a page would otherwise be
+flushed to disc.
+
+To try and add to the cache: (blank_page, new_id) = cache_query(page, NULL);
+ [NULL means "give me a blank page"].
+To try and retrieve from the cache: (page, new_id) = cache_query(x_page, id)
+ [we may request that x_page just be discarded, and therefore not impinge
+  on this domain's cache quota].
+
+
+Booting secondary processors
+----------------------------
+
+start_of_day (i386/setup.c)
+smp_boot_cpus (i386/smpboot.c)
+ * initialises boot CPU data
+ * parses APIC tables
+ * for each cpu:
+   do_boot_cpu (i386/smpboot.c)
+    * forks a new idle process
+    * points initial stack inside new task struct
+    * points initial EIP at a trampoline in very low memory
+    * frobs remote APIC....
+
+On other processor:
+ * trampoline sets GDT and IDT
+ * jumps at main boot address with magic register value
+ * after setting proper page and descriptor tables, jumps at...
+   initialize_secondary (i386/smpboot.c)
+    * simply reads ESP/EIP out of the (new) idle task
+    * this causes a jump to...
+      start_secondary (i386/smpboot.c)
+       * reset all processor state
+       * barrier, then write bitmasks to signal back to boot cpu
+       * then barrel into...
+         cpu_idle (i386/process.c)
+         [THIS IS PROBABLY REASONABLE -- BOOT CPU SHOULD KICK
+          SECONDARIES TO GET WORK DONE]
+
+
+SMP capabilities
+----------------
+
+Current intention is to allow hypervisor to schedule on all processors in
+SMP boxen, but to tie each domain to a single processor. This simplifies
+many SMP intricacies both in terms of correctness and efficiency (eg.
+TLB flushing, network packet delivery, ...).
+
+Clients can still make use of SMP by installing multiple domains on a single
+machine, and treating it as a fast cluster (at the very least, the
+hypervisor will have fast routing of locally-destined packets).
diff --git a/xen/Rules.mk b/xen/Rules.mk
new file mode 100644
index 0000000000..13a57ed550
--- /dev/null
+++ b/xen/Rules.mk
@@ -0,0 +1,36 @@
+
+ARCH    := i386
+
+TARGET  := $(BASEDIR)/image
+HDRS    := $(wildcard $(BASEDIR)/include/xeno/*.h)
+HDRS    += $(wildcard $(BASEDIR)/include/scsi/*.h)
+HDRS    += $(wildcard $(BASEDIR)/include/hypervisor-ifs/*.h)
+HDRS    += $(wildcard $(BASEDIR)/include/asm-$(ARCH)/*.h)
+
+C_SRCS  := $(wildcard *.c)
+S_SRCS  := $(wildcard *.S)
+OBJS    := $(patsubst %.S,%.o,$(S_SRCS))
+OBJS    += $(patsubst %.c,%.o,$(C_SRCS))
+
+# Note that link order matters!
+ALL_OBJS := $(BASEDIR)/common/common.o
+ALL_OBJS += $(BASEDIR)/net/network.o
+ALL_OBJS += $(BASEDIR)/drivers/char/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/net/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/block/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/ide/driver.o
+#ALL_OBJS += $(BASEDIR)/drivers/scsi/driver.o
+ALL_OBJS += $(BASEDIR)/arch/$(ARCH)/arch.o
+
+HOSTCC     = gcc
+HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer 
+
+include $(BASEDIR)/arch/$(ARCH)/Rules.mk
+
+%.o: %.c $(HDRS) Makefile
+	$(CC) -g $(CFLAGS) -c $< -o $@
+
+%.o: %.S $(HDRS) Makefile
+	$(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
diff --git a/xen/arch/i386/Makefile b/xen/arch/i386/Makefile
new file mode 100644
index 0000000000..6778324206
--- /dev/null
+++ b/xen/arch/i386/Makefile
@@ -0,0 +1,17 @@
+
+include $(BASEDIR)/Rules.mk
+
+# What happens here? We link monitor object files together, starting
+# at MONITOR_BASE (a very high address). But bootloader cannot put
+# things there, so we initially load at LOAD_BASE. A hacky little
+# tool called `elf-reloc' is used to modify segment offsets from
+# MONITOR_BASE-relative to LOAD_BASE-relative.
+# (NB. Linux gets round this by turning its image into raw binary, then 
+# wrapping that with a low-memory bootstrapper.)
+default: boot/boot.o $(OBJS)
+	$(LD) -r -o arch.o $(OBJS)
+	$(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET)
+	$(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET)
+
+clean:
+	rm -f *.o *~ core boot/*.o boot/*~ boot/core
diff --git a/xen/arch/i386/Rules.mk b/xen/arch/i386/Rules.mk
new file mode 100644
index 0000000000..8a672e228a
--- /dev/null
+++ b/xen/arch/i386/Rules.mk
@@ -0,0 +1,14 @@
+########################################
+# x86-specific definitions
+
+CC := gcc
+LD := ld
+# Linker should relocate monitor to this address
+MONITOR_BASE := 0xFC500000
+# Bootloader should load monitor to this real address
+LOAD_BASE    := 0x00100000
+CFLAGS  := -nostdinc -fno-builtin -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE) 
+CFLAGS  += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG
+LDFLAGS := -T xeno.lds -N
+
+
diff --git a/xen/arch/i386/acpitable.c b/xen/arch/i386/acpitable.c
new file mode 100644
index 0000000000..1078db3eee
--- /dev/null
+++ b/xen/arch/i386/acpitable.c
@@ -0,0 +1,549 @@
+/*
+ *  acpitable.c - IA32-specific ACPI boot-time initialization (Revision: 1)
+ *
+ *  Copyright (C) 1999 Andrew Henroid
+ *  Copyright (C) 2001 Richard Schaal
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ *  Copyright (C) 2001 Arjan van de Ven <arjanv@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * $Id: acpitable.c,v 1.7 2001/11/04 12:21:18 fenrus Exp $
+ */
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+/*#include <xeno/stddef.h>*/
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <asm/mpspec.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+/*#include <asm/pgtable.h>*/
+
+#include "acpitable.h"
+
+static acpi_table_handler acpi_boot_ops[ACPI_TABLE_COUNT];
+
+
+static unsigned char __init
+acpi_checksum(void *buffer, int length)
+{
+	int i;
+	unsigned char *bytebuffer;
+	unsigned char sum = 0;
+
+	if (!buffer || length <= 0)
+		return 0;
+
+	bytebuffer = (unsigned char *) buffer;
+
+	for (i = 0; i < length; i++)
+		sum += *(bytebuffer++);
+
+	return sum;
+}
+
+static void __init
+acpi_print_table_header(acpi_table_header * header)
+{
+	if (!header)
+		return;
+
+	printk(KERN_INFO "ACPI table found: %.4s v%d [%.6s %.8s %d.%d]\n",
+	       header->signature, header->revision, header->oem_id,
+	       header->oem_table_id, header->oem_revision >> 16,
+	       header->oem_revision & 0xffff);
+
+	return;
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_tb_scan_memory_for_rsdp
+ *
+ * PARAMETERS:  address       - Starting pointer for search
+ *              length        - Maximum length to search
+ *
+ * RETURN:      Pointer to the RSDP if found and valid, otherwise NULL.
+ *
+ * DESCRIPTION: Search a block of memory for the RSDP signature
+ *
+ ******************************************************************************/
+
+static void *__init
+acpi_tb_scan_memory_for_rsdp(void *address, int length)
+{
+	u32 offset;
+
+	if (length <= 0)
+		return NULL;
+
+	/* Search from given start addr for the requested length  */
+
+	offset = 0;
+
+	while (offset < length) {
+		/* The signature must match and the checksum must be correct */
+		if (strncmp(address, RSDP_SIG, sizeof(RSDP_SIG) - 1) == 0 &&
+		    acpi_checksum(address, RSDP_CHECKSUM_LENGTH) == 0) {
+			/* If so, we have found the RSDP */
+			printk(KERN_INFO "ACPI: RSDP located at physical address %p\n",
+			       address);
+			return address;
+		}
+		offset += RSDP_SCAN_STEP;
+		address += RSDP_SCAN_STEP;
+	}
+
+	/* Searched entire block, no RSDP was found */
+	printk(KERN_INFO "ACPI: Searched entire block, no RSDP was found.\n");
+	return NULL;
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_find_root_pointer
+ *
+ * PARAMETERS:  none
+ *
+ * RETURN:      physical address of the RSDP 
+ *
+ * DESCRIPTION: Search lower 1_mbyte of memory for the root system descriptor
+ *              pointer structure.  If it is found, set *RSDP to point to it.
+ *
+ *              NOTE: The RSDP must be either in the first 1_k of the Extended
+ *              BIOS Data Area or between E0000 and FFFFF (ACPI 1.0 section
+ *              5.2.2; assertion #421).
+ *
+ ******************************************************************************/
+
+static struct acpi_table_rsdp * __init
+acpi_find_root_pointer(void)
+{
+	struct acpi_table_rsdp * rsdp;
+
+	/*
+	 * Physical address is given
+	 */
+	/*
+	 * Region 1) Search EBDA (low memory) paragraphs
+	 */
+	rsdp = acpi_tb_scan_memory_for_rsdp(__va(LO_RSDP_WINDOW_BASE),
+					 LO_RSDP_WINDOW_SIZE);
+
+	if (rsdp)
+		return rsdp;
+
+	/*
+	 * Region 2) Search upper memory: 16-byte boundaries in E0000h-F0000h
+	 */
+	rsdp = acpi_tb_scan_memory_for_rsdp(__va(HI_RSDP_WINDOW_BASE),
+					       HI_RSDP_WINDOW_SIZE);
+
+	
+					     
+	if (rsdp)
+		return rsdp;
+
+	printk(KERN_ERR "ACPI: System description tables not found\n");
+	return NULL;
+}
+
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+static __init char *
+__va_range(unsigned long phys, unsigned long size)
+{
+	unsigned long base, offset, mapped_size;
+	int idx;
+
+	offset = phys & (PAGE_SIZE - 1);
+	mapped_size = PAGE_SIZE - offset;
+	set_fixmap(FIX_IO_APIC_BASE_END, phys);
+	base = fix_to_virt(FIX_IO_APIC_BASE_END);
+	dprintk("__va_range(0x%lx, 0x%lx): idx=%d mapped at %lx\n", phys, size,
+		FIX_IO_APIC_BASE_END, base);
+
+	/*
+	 * Most cases can be covered by the below.
+	 */
+	idx = FIX_IO_APIC_BASE_END;
+	while (mapped_size < size) {
+		if (--idx < FIX_IO_APIC_BASE_0)
+			return 0;	/* cannot handle this */
+		phys += PAGE_SIZE;
+		set_fixmap(idx, phys);
+		mapped_size += PAGE_SIZE;
+	}
+
+	return ((unsigned char *) base + offset);
+}
+
+static int __init acpi_tables_init(void)
+{
+	int result = -ENODEV;
+	acpi_table_header *header = NULL;
+	struct acpi_table_rsdp *rsdp = NULL;
+	struct acpi_table_rsdt *rsdt = NULL;
+	struct acpi_table_rsdt saved_rsdt;
+	int tables = 0;
+	int type = 0;
+	int i = 0;
+
+
+	rsdp = (struct acpi_table_rsdp *) acpi_find_root_pointer();
+
+	if (!rsdp)
+		return -ENODEV;
+		
+	printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
+	       rsdp->oem_id);
+	       
+	if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) {
+		printk(KERN_WARNING "RSDP table signature incorrect\n");
+		return -EINVAL;
+	}
+
+	rsdt = (struct acpi_table_rsdt *)
+	    __va_range(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt));
+
+	if (!rsdt) {
+		printk(KERN_WARNING "ACPI: Invalid root system description tables (RSDT)\n");
+		return -ENODEV;
+	}
+	
+	header = & rsdt->header;
+	acpi_print_table_header(header);
+	
+	if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) {
+		printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
+		return -ENODEV;
+	}
+		
+	/* 
+	 * The number of tables is computed by taking the 
+	 * size of all entries (header size minus total 
+	 * size of RSDT) divided by the size of each entry
+	 * (4-byte table pointers).
+	 */
+	tables = (header->length - sizeof(acpi_table_header)) / 4;
+		    
+	memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
+
+	if (saved_rsdt.header.length > sizeof(saved_rsdt)) {
+		printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", saved_rsdt.header.length);
+		return -ENODEV;
+	}
+
+	for (i = 0; i < tables; i++) {
+		/* Map in header, then map in full table length. */
+		header = (acpi_table_header *)
+			    __va_range(saved_rsdt.entry[i],
+				       sizeof(acpi_table_header));
+		if (!header)
+			break;
+		header = (acpi_table_header *)
+			    __va_range(saved_rsdt.entry[i], header->length);
+		if (!header)
+			break;
+
+		acpi_print_table_header(header);
+		
+		if (acpi_checksum(header,header->length)) {
+			printk(KERN_WARNING "ACPI %s has invalid checksum\n", 
+				acpi_table_signatures[i]);
+			continue;
+		}
+		
+		for (type = 0; type < ACPI_TABLE_COUNT; type++)
+			if (!strncmp((char *) &header->signature,
+			     acpi_table_signatures[type],strlen(acpi_table_signatures[type])))
+				break;
+
+		if (type >= ACPI_TABLE_COUNT) {
+			printk(KERN_WARNING "ACPI: Unsupported table %.4s\n",
+			       header->signature);
+			continue;
+		}
+
+
+		if (!acpi_boot_ops[type])
+			continue;
+			
+		result = acpi_boot_ops[type] (header,
+						 (unsigned long) saved_rsdt.
+						 entry[i]);
+	}
+
+	return result;
+}
+
+static int total_cpus __initdata = 0;
+int have_acpi_tables;
+
+extern void __init MP_processor_info(struct mpc_config_processor *);
+
+static void __init
+acpi_parse_lapic(struct acpi_table_lapic *local_apic)
+{
+	struct mpc_config_processor proc_entry;
+	int ix = 0;
+
+	if (!local_apic)
+		return;
+
+	printk(KERN_INFO "LAPIC (acpi_id[0x%04x] id[0x%x] enabled[%d])\n",
+		local_apic->acpi_id, local_apic->id, local_apic->flags.enabled);
+
+	printk(KERN_INFO "CPU %d (0x%02x00)", total_cpus, local_apic->id);
+
+	if (local_apic->flags.enabled) {
+		printk(" enabled");
+		ix = local_apic->id;
+		if (ix >= MAX_APICS) {
+			printk(KERN_WARNING
+			       "Processor #%d INVALID - (Max ID: %d).\n", ix,
+			       MAX_APICS);
+			return;
+		}
+		/* 
+		 * Fill in the info we want to save.  Not concerned about 
+		 * the processor ID.  Processor features aren't present in 
+		 * the table.
+		 */
+		proc_entry.mpc_type = MP_PROCESSOR;
+		proc_entry.mpc_apicid = local_apic->id;
+		proc_entry.mpc_cpuflag = CPU_ENABLED;
+		if (proc_entry.mpc_apicid == boot_cpu_physical_apicid) {
+			printk(" (BSP)");
+			proc_entry.mpc_cpuflag |= CPU_BOOTPROCESSOR;
+		}
+		proc_entry.mpc_cpufeature =
+		    (boot_cpu_data.x86 << 8) | 
+		    (boot_cpu_data.x86_model << 4) | 
+		     boot_cpu_data.x86_mask;
+		proc_entry.mpc_featureflag = boot_cpu_data.x86_capability[0];
+		proc_entry.mpc_reserved[0] = 0;
+		proc_entry.mpc_reserved[1] = 0;
+		proc_entry.mpc_apicver = 0x10;	/* integrated APIC */
+		MP_processor_info(&proc_entry);
+	} else {
+		printk(" disabled");
+	}
+	printk("\n");
+
+	total_cpus++;
+	return;
+}
+
+static void __init
+acpi_parse_ioapic(struct acpi_table_ioapic *ioapic)
+{
+
+	if (!ioapic)
+		return;
+
+	printk(KERN_INFO
+	       "IOAPIC (id[0x%x] address[0x%x] global_irq_base[0x%x])\n",
+	       ioapic->id, ioapic->address, ioapic->global_irq_base);
+
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_WARNING
+		       "Max # of I/O APICs (%d) exceeded (found %d).\n",
+		       MAX_IO_APICS, nr_ioapics);
+/*		panic("Recompile kernel with bigger MAX_IO_APICS!\n");   */
+	}
+}
+
+
+/* Interrupt source overrides inform the machine about exceptions
+   to the normal "PIC" mode interrupt routing */
+   
+static void __init
+acpi_parse_int_src_ovr(struct acpi_table_int_src_ovr *intsrc)
+{
+	if (!intsrc)
+		return;
+
+	printk(KERN_INFO
+	       "INT_SRC_OVR (bus[%d] irq[0x%x] global_irq[0x%x] polarity[0x%x] trigger[0x%x])\n",
+	       intsrc->bus, intsrc->bus_irq, intsrc->global_irq,
+	       intsrc->flags.polarity, intsrc->flags.trigger);
+}
+
+/*
+ * At this point, we look at the interrupt assignment entries in the MPS
+ * table.
+ */ 
+ 
+static void __init acpi_parse_nmi_src(struct acpi_table_nmi_src *nmisrc)
+{
+	if (!nmisrc)
+		return;
+
+	printk(KERN_INFO
+	       "NMI_SRC (polarity[0x%x] trigger[0x%x] global_irq[0x%x])\n",
+	       nmisrc->flags.polarity, nmisrc->flags.trigger,
+	       nmisrc->global_irq);
+
+}
+static void __init
+acpi_parse_lapic_nmi(struct acpi_table_lapic_nmi *localnmi)
+{
+	if (!localnmi)
+		return;
+
+	printk(KERN_INFO
+	       "LAPIC_NMI (acpi_id[0x%04x] polarity[0x%x] trigger[0x%x] lint[0x%x])\n",
+	       localnmi->acpi_id, localnmi->flags.polarity,
+	       localnmi->flags.trigger, localnmi->lint);
+}
+static void __init
+acpi_parse_lapic_addr_ovr(struct acpi_table_lapic_addr_ovr *lapic_addr_ovr)
+{
+	if (!lapic_addr_ovr)
+		return;
+
+	printk(KERN_INFO "LAPIC_ADDR_OVR (address[0x%lx])\n",
+	       (unsigned long) lapic_addr_ovr->address);
+
+}
+
+static void __init
+acpi_parse_plat_int_src(struct acpi_table_plat_int_src *plintsrc)
+{
+	if (!plintsrc)
+		return;
+
+	printk(KERN_INFO
+	       "PLAT_INT_SRC (polarity[0x%x] trigger[0x%x] type[0x%x] id[0x%04x] eid[0x%x] iosapic_vector[0x%x] global_irq[0x%x]\n",
+	       plintsrc->flags.polarity, plintsrc->flags.trigger,
+	       plintsrc->type, plintsrc->id, plintsrc->eid,
+	       plintsrc->iosapic_vector, plintsrc->global_irq);
+}
+static int __init
+acpi_parse_madt(acpi_table_header * header, unsigned long phys)
+{
+
+	struct acpi_table_madt *madt;	    
+	acpi_madt_entry_header *entry_header;
+	int table_size;
+	
+	madt = (struct acpi_table_madt *) __va_range(phys, header->length);
+
+	if (!madt)
+		return -EINVAL;
+
+	table_size = (int) (header->length - sizeof(*madt));
+	entry_header =
+	    (acpi_madt_entry_header *) ((void *) madt + sizeof(*madt));
+
+	while (entry_header && (table_size > 0)) {
+		switch (entry_header->type) {
+		case ACPI_MADT_LAPIC:
+			acpi_parse_lapic((struct acpi_table_lapic *)
+					 entry_header);
+			break;
+		case ACPI_MADT_IOAPIC:
+			acpi_parse_ioapic((struct acpi_table_ioapic *)
+					  entry_header);
+			break;
+		case ACPI_MADT_INT_SRC_OVR:
+			acpi_parse_int_src_ovr((struct acpi_table_int_src_ovr *)
+					       entry_header);
+			break;
+		case ACPI_MADT_NMI_SRC:
+			acpi_parse_nmi_src((struct acpi_table_nmi_src *)
+					   entry_header);
+			break;
+		case ACPI_MADT_LAPIC_NMI:
+			acpi_parse_lapic_nmi((struct acpi_table_lapic_nmi *)
+					     entry_header);
+			break;
+		case ACPI_MADT_LAPIC_ADDR_OVR:
+			acpi_parse_lapic_addr_ovr((struct
+						   acpi_table_lapic_addr_ovr *)
+						  entry_header);
+			break;
+		case ACPI_MADT_PLAT_INT_SRC:
+			acpi_parse_plat_int_src((struct acpi_table_plat_int_src
+						 *) entry_header);
+			break;
+		default:
+			printk(KERN_WARNING
+			       "Unsupported MADT entry type 0x%x\n",
+			       entry_header->type);
+			break;
+		}
+		table_size -= entry_header->length;
+		entry_header =
+		    (acpi_madt_entry_header *) ((void *) entry_header +
+						entry_header->length);
+	}
+
+	if (!total_cpus) {
+		printk("ACPI: No Processors found in the APCI table.\n");
+		return -EINVAL;
+	}
+
+	printk(KERN_INFO "%d CPUs total\n", total_cpus);
+
+	if (madt->lapic_address)
+		mp_lapic_addr = madt->lapic_address;
+	else
+		mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	printk(KERN_INFO "Local APIC address %x\n", madt->lapic_address);
+
+	return 0;
+}
+
+extern int opt_noacpi;
+
+/*
+ * Configure the processor info using MADT in the ACPI tables. If we fail to
+ * configure that, then we use the MPS tables.
+ */
+void __init
+config_acpi_tables(void)
+{
+	memset(&acpi_boot_ops, 0, sizeof(acpi_boot_ops));
+	acpi_boot_ops[ACPI_APIC] = acpi_parse_madt;
+
+	if (!opt_noacpi && !acpi_tables_init()) {
+		have_acpi_tables = 1;
+		printk("Enabling the CPU's according to the ACPI table\n");
+	}
+}
diff --git a/xen/arch/i386/acpitable.h b/xen/arch/i386/acpitable.h
new file mode 100644
index 0000000000..ddf1c84a65
--- /dev/null
+++ b/xen/arch/i386/acpitable.h
@@ -0,0 +1,260 @@
+/*
+ *  acpitable.c - IA32-specific ACPI boot-time initialization (Revision: 1)
+ *
+ *  Copyright (C) 1999 Andrew Henroid
+ *  Copyright (C) 2001 Richard Schaal
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ *  Copyright (C) 2001 Arjan van de Ven <arjanv@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * $Id: acpitable.h,v 1.3 2001/11/03 22:41:34 fenrus Exp $
+ */
+
+/*
+ * The following codes are cut&pasted from drivers/acpi. Part of the code
+ * there can be not updated or delivered yet.
+ * To avoid conflicts when CONFIG_ACPI is defined, the following codes are
+ * modified so that they are self-contained in this file.
+ * -- jun
+ */
+ 
+#ifndef _HEADER_ACPITABLE_H_
+#define _HEADER_ACPITABLE_H_
+
+#define dprintk printk
+typedef unsigned int ACPI_TBLPTR;
+
+typedef struct {		/* ACPI common table header */
+	char signature[4];	/* identifies type of table */
+	u32 length;		/* length of table,
+				   in bytes, * including header */
+	u8 revision;		/* specification minor version # */
+	u8 checksum;		/* to make sum of entire table == 0 */
+	char oem_id[6];		/* OEM identification */
+	char oem_table_id[8];	/* OEM table identification */
+	u32 oem_revision;	/* OEM revision number */
+	char asl_compiler_id[4];	/* ASL compiler vendor ID */
+	u32 asl_compiler_revision;	/* ASL compiler revision number */
+} acpi_table_header __attribute__ ((packed));;
+
+enum {
+	ACPI_APIC = 0,
+	ACPI_BOOT,
+	ACPI_DBGP,
+	ACPI_DSDT,
+	ACPI_ECDT,
+	ACPI_ETDT,
+	ACPI_FACP,
+	ACPI_FACS,
+	ACPI_OEMX,
+	ACPI_PSDT,
+	ACPI_SBST,
+	ACPI_SLIT,
+	ACPI_SPCR,
+	ACPI_SRAT,
+	ACPI_SSDT,
+	ACPI_SPMI,
+	ACPI_XSDT,
+	ACPI_TABLE_COUNT
+};
+
+static char *acpi_table_signatures[ACPI_TABLE_COUNT] = {
+	"APIC",
+	"BOOT",
+	"DBGP",
+	"DSDT",
+	"ECDT",
+	"ETDT",
+	"FACP",
+	"FACS",
+	"OEM",
+	"PSDT",
+	"SBST",
+	"SLIT",
+	"SPCR",
+	"SRAT",
+	"SSDT",
+	"SPMI",
+	"XSDT"
+};
+
+struct acpi_table_madt {
+	acpi_table_header header;
+	u32 lapic_address;
+	struct {
+		u32 pcat_compat:1;
+		u32 reserved:31;
+	} flags __attribute__ ((packed));
+} __attribute__ ((packed));;
+
+enum {
+	ACPI_MADT_LAPIC = 0,
+	ACPI_MADT_IOAPIC,
+	ACPI_MADT_INT_SRC_OVR,
+	ACPI_MADT_NMI_SRC,
+	ACPI_MADT_LAPIC_NMI,
+	ACPI_MADT_LAPIC_ADDR_OVR,
+	ACPI_MADT_IOSAPIC,
+	ACPI_MADT_LSAPIC,
+	ACPI_MADT_PLAT_INT_SRC,
+	ACPI_MADT_ENTRY_COUNT
+};
+
+#define RSDP_SIG			"RSD PTR "
+#define RSDT_SIG 			"RSDT"
+
+#define ACPI_DEBUG_PRINT(pl)
+
+#define ACPI_MEMORY_MODE                0x01
+#define ACPI_LOGICAL_ADDRESSING         0x00
+#define ACPI_PHYSICAL_ADDRESSING        0x01
+
+#define LO_RSDP_WINDOW_BASE         	0	/* Physical Address */
+#define HI_RSDP_WINDOW_BASE         	0xE0000	/* Physical Address */
+#define LO_RSDP_WINDOW_SIZE         	0x400
+#define HI_RSDP_WINDOW_SIZE         	0x20000
+#define RSDP_SCAN_STEP			16
+#define RSDP_CHECKSUM_LENGTH		20
+
+typedef int (*acpi_table_handler) (acpi_table_header * header, unsigned long);
+
+struct acpi_table_rsdp {
+	char signature[8];
+	u8 checksum;
+	char oem_id[6];
+	u8 revision;
+	u32 rsdt_address;
+} __attribute__ ((packed));
+
+struct acpi_table_rsdt {
+	acpi_table_header header;
+	u32 entry[ACPI_TABLE_COUNT];
+} __attribute__ ((packed));
+
+typedef struct {
+	u8 type;
+	u8 length;
+} acpi_madt_entry_header __attribute__ ((packed));
+
+typedef struct {
+	u16 polarity:2;
+	u16 trigger:2;
+	u16 reserved:12;
+} acpi_madt_int_flags __attribute__ ((packed));
+
+struct acpi_table_lapic {
+	acpi_madt_entry_header header;
+	u8 acpi_id;
+	u8 id;
+	struct {
+		u32 enabled:1;
+		u32 reserved:31;
+	} flags __attribute__ ((packed));
+} __attribute__ ((packed));
+
+struct acpi_table_ioapic {
+	acpi_madt_entry_header header;
+	u8 id;
+	u8 reserved;
+	u32 address;
+	u32 global_irq_base;
+} __attribute__ ((packed));
+
+struct acpi_table_int_src_ovr {
+	acpi_madt_entry_header header;
+	u8 bus;
+	u8 bus_irq;
+	u32 global_irq;
+	acpi_madt_int_flags flags;
+} __attribute__ ((packed));
+
+struct acpi_table_nmi_src {
+	acpi_madt_entry_header header;
+	acpi_madt_int_flags flags;
+	u32 global_irq;
+} __attribute__ ((packed));
+
+struct acpi_table_lapic_nmi {
+	acpi_madt_entry_header header;
+	u8 acpi_id;
+	acpi_madt_int_flags flags;
+	u8 lint;
+} __attribute__ ((packed));
+
+struct acpi_table_lapic_addr_ovr {
+	acpi_madt_entry_header header;
+	u8 reserved[2];
+	u64 address;
+} __attribute__ ((packed));
+
+struct acpi_table_iosapic {
+	acpi_madt_entry_header header;
+	u8 id;
+	u8 reserved;
+	u32 global_irq_base;
+	u64 address;
+} __attribute__ ((packed));
+
+struct acpi_table_lsapic {
+	acpi_madt_entry_header header;
+	u8 acpi_id;
+	u8 id;
+	u8 eid;
+	u8 reserved[3];
+	struct {
+		u32 enabled:1;
+		u32 reserved:31;
+	} flags;
+} __attribute__ ((packed));
+
+struct acpi_table_plat_int_src {
+	acpi_madt_entry_header header;
+	acpi_madt_int_flags flags;
+	u8 type;
+	u8 id;
+	u8 eid;
+	u8 iosapic_vector;
+	u32 global_irq;
+	u32 reserved;
+} __attribute__ ((packed));
+
+/*
+ * ACPI Table Descriptor.  One per ACPI table
+ */
+typedef struct acpi_table_desc {
+	struct acpi_table_desc *prev;
+	struct acpi_table_desc *next;
+	struct acpi_table_desc *installed_desc;
+	acpi_table_header *pointer;
+	void *base_pointer;
+	u8 *aml_pointer;
+	u64 physical_address;
+	u32 aml_length;
+	u32 length;
+	u32 count;
+	u16 table_id;
+	u8 type;
+	u8 allocation;
+	u8 loaded_into_namespace;
+
+} acpi_table_desc __attribute__ ((packed));;
+
+#endif
diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c
new file mode 100644
index 0000000000..9b999df951
--- /dev/null
+++ b/xen/arch/i386/apic.c
@@ -0,0 +1,845 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: apic.c
+ *      Author: 
+ *     Changes: 
+ *              
+ *        Date: Nov 2002
+ * 
+ * Environment: Xen Hypervisor
+ * Description: programmable APIC timer interface for accurate timers
+ *              modified version of Linux' apic.c
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+/*
+ *	Local APIC handling, local APIC timers
+ *
+ *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively.
+ */
+
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/sched.h>
+#include <xeno/irq.h>
+#include <xeno/delay.h>
+#include <asm/mc146818rtc.h>
+#include <asm/msr.h>
+#include <xeno/errno.h>
+#include <asm/atomic.h>
+#include <xeno/smp.h>
+#include <xeno/interrupt.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+#include <xeno/ac_timer.h>
+
+#undef APIC_TIME_TRACE
+#ifdef APIC_TIME_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+
+/* Using APIC to generate smp_local_timer_interrupt? */
+int using_apic_timer = 0;
+
+int get_maxlvt(void)
+{
+    unsigned int v, ver, maxlvt;
+
+    v = apic_read(APIC_LVR);
+    ver = GET_APIC_VERSION(v);
+    /* 82489DXs do not report # of LVT entries. */
+    maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
+    return maxlvt;
+}
+
+static void clear_local_APIC(void)
+{
+    int maxlvt;
+    unsigned long v;
+
+    maxlvt = get_maxlvt();
+
+    /*
+     * Careful: we have to set masks only first to deassert
+     * any level-triggered sources.
+     */
+    v = apic_read(APIC_LVTT);
+    apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+    v = apic_read(APIC_LVT0);
+    apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+    v = apic_read(APIC_LVT1);
+    apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+    if (maxlvt >= 3) {
+        v = apic_read(APIC_LVTERR);
+        apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+    }
+    if (maxlvt >= 4) {
+        v = apic_read(APIC_LVTPC);
+        apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+    }
+
+    /*
+     * Clean APIC state for other OSs:
+     */
+    apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+    apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+    apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+    if (maxlvt >= 3)
+        apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+    if (maxlvt >= 4)
+        apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+}
+
+void __init connect_bsp_APIC(void)
+{
+    if (pic_mode) {
+        /*
+         * Do not trust the local APIC being empty at bootup.
+         */
+        clear_local_APIC();
+        /*
+         * PIC mode, enable APIC mode in the IMCR, i.e.
+         * connect BSP's local APIC to INT and NMI lines.
+         */
+        printk("leaving PIC mode, enabling APIC mode.\n");
+        outb(0x70, 0x22);
+        outb(0x01, 0x23);
+    }
+}
+
+void disconnect_bsp_APIC(void)
+{
+    if (pic_mode) {
+        /*
+         * Put the board back into PIC mode (has an effect
+         * only on certain older boards).  Note that APIC
+         * interrupts, including IPIs, won't work beyond
+         * this point!  The only exception are INIT IPIs.
+         */
+        printk("disabling APIC mode, entering PIC mode.\n");
+        outb(0x70, 0x22);
+        outb(0x00, 0x23);
+    }
+}
+
+void disable_local_APIC(void)
+{
+    unsigned long value;
+
+    clear_local_APIC();
+
+    /*
+     * Disable APIC (implies clearing of registers
+     * for 82489DX!).
+     */
+    value = apic_read(APIC_SPIV);
+    value &= ~APIC_SPIV_APIC_ENABLED;
+    apic_write_around(APIC_SPIV, value);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+    unsigned int reg0, reg1;
+
+    /*
+     * The version register is read-only in a real APIC.
+     */
+    reg0 = apic_read(APIC_LVR);
+    Dprintk("Getting VERSION: %x\n", reg0);
+    apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+    reg1 = apic_read(APIC_LVR);
+    Dprintk("Getting VERSION: %x\n", reg1);
+
+    /*
+     * The two version reads above should print the same
+     * numbers.  If the second one is different, then we
+     * poke at a non-APIC.
+     */
+    if (reg1 != reg0)
+        return 0;
+
+    /*
+     * Check if the version looks reasonably.
+     */
+    reg1 = GET_APIC_VERSION(reg0);
+    if (reg1 == 0x00 || reg1 == 0xff)
+        return 0;
+    reg1 = get_maxlvt();
+    if (reg1 < 0x02 || reg1 == 0xff)
+        return 0;
+
+    /*
+     * The ID register is read/write in a real APIC.
+     */
+    reg0 = apic_read(APIC_ID);
+    Dprintk("Getting ID: %x\n", reg0);
+    apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+    reg1 = apic_read(APIC_ID);
+    Dprintk("Getting ID: %x\n", reg1);
+    apic_write(APIC_ID, reg0);
+    if (reg1 != (reg0 ^ APIC_ID_MASK))
+        return 0;
+
+    /*
+     * The next two are just to see if we have sane values.
+     * They're only really relevant if we're in Virtual Wire
+     * compatibility mode, but most boxes are anymore.
+     */
+    reg0 = apic_read(APIC_LVT0);
+    Dprintk("Getting LVT0: %x\n", reg0);
+    reg1 = apic_read(APIC_LVT1);
+    Dprintk("Getting LVT1: %x\n", reg1);
+
+    return 1;
+}
+
+void __init sync_Arb_IDs(void)
+{
+    /* Wait for idle. */
+    apic_wait_icr_idle();
+
+    Dprintk("Synchronizing Arb IDs.\n");
+    apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+                      | APIC_DM_INIT);
+}
+
+extern void __error_in_apic_c (void);
+
+/*
+ * WAS: An initial setup of the virtual wire mode.
+ * NOW: We don't bother doing anything. All we need at this point
+ * is to receive timer ticks, so that 'jiffies' is incremented.
+ * If we're SMP, then we can assume BIOS did setup for us.
+ * If we're UP, then the APIC should be disabled (it is at reset).
+ * If we're UP and APIC is enabled, then BIOS is clever and has 
+ * probably done initial interrupt routing for us.
+ */
+void __init init_bsp_APIC(void)
+{
+}
+
+void __init setup_local_APIC (void)
+{
+    unsigned long value, ver, maxlvt;
+
+    value = apic_read(APIC_LVR);
+    ver = GET_APIC_VERSION(value);
+
+    if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
+        __error_in_apic_c();
+
+    /* Double-check wether this APIC is really registered. */
+    if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
+        BUG();
+
+    /*
+     * Intel recommends to set DFR, LDR and TPR before enabling
+     * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+     * document number 292116).  So here it goes...
+     */
+
+    /*
+     * In clustered apic mode, the firmware does this for us 
+     * Put the APIC into flat delivery mode.
+     * Must be "all ones" explicitly for 82489DX.
+     */
+    apic_write_around(APIC_DFR, 0xffffffff);
+
+    /*
+     * Set up the logical destination ID.
+     */
+    value = apic_read(APIC_LDR);
+    value &= ~APIC_LDR_MASK;
+    value |= (1<<(smp_processor_id()+24));
+    apic_write_around(APIC_LDR, value);
+
+    /*
+     * Set Task Priority to 'accept all'. We never change this
+     * later on.
+     */
+    value = apic_read(APIC_TASKPRI);
+    value &= ~APIC_TPRI_MASK;
+    apic_write_around(APIC_TASKPRI, value);
+
+    /*
+     * Now that we are all set up, enable the APIC
+     */
+    value = apic_read(APIC_SPIV);
+    value &= ~APIC_VECTOR_MASK;
+    /*
+     * Enable APIC
+     */
+    value |= APIC_SPIV_APIC_ENABLED;
+
+    /* Enable focus processor (bit==0) */
+    value &= ~APIC_SPIV_FOCUS_DISABLED;
+
+    /* Set spurious IRQ vector */
+    value |= SPURIOUS_APIC_VECTOR;
+    apic_write_around(APIC_SPIV, value);
+
+    /*
+     * Set up LVT0, LVT1:
+     *
+     * set up through-local-APIC on the BP's LINT0. This is not
+     * strictly necessery in pure symmetric-IO mode, but sometimes
+     * we delegate interrupts to the 8259A.
+     */
+    /*
+     * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+     */
+    value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+    if (!smp_processor_id()) { 
+        value = APIC_DM_EXTINT;
+        printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
+    } else {
+        value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+        printk("masked ExtINT on CPU#%d\n", smp_processor_id());
+    }
+    apic_write_around(APIC_LVT0, value);
+
+    /*
+     * only the BP should see the LINT1 NMI signal, obviously.
+     */
+    if (!smp_processor_id())
+        value = APIC_DM_NMI;
+    else
+        value = APIC_DM_NMI | APIC_LVT_MASKED;
+    if (!APIC_INTEGRATED(ver))		/* 82489DX */
+        value |= APIC_LVT_LEVEL_TRIGGER;
+    apic_write_around(APIC_LVT1, value);
+
+    if (APIC_INTEGRATED(ver)) {		/* !82489DX */
+        maxlvt = get_maxlvt();
+        if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+            apic_write(APIC_ESR, 0);
+        value = apic_read(APIC_ESR);
+        printk("ESR value before enabling vector: %08lx\n", value);
+
+        value = ERROR_APIC_VECTOR;      /* enables sending errors */
+        apic_write_around(APIC_LVTERR, value);
+        /* spec says clear errors after enabling vector. */
+        if (maxlvt > 3)
+            apic_write(APIC_ESR, 0);
+        value = apic_read(APIC_ESR);
+        printk("ESR value after enabling vector: %08lx\n", value);
+    } else {
+        printk("No ESR for 82489DX.\n");
+    }
+}
+
+
+static inline void apic_pm_init1(void) { }
+static inline void apic_pm_init2(void) { }
+
+
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ */
+
+static int __init detect_init_APIC (void)
+{
+    u32 h, l, features;
+    extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+    /* Workaround for us being called before identify_cpu(). */
+    get_cpu_vendor(&boot_cpu_data);
+
+    switch (boot_cpu_data.x86_vendor) {
+    case X86_VENDOR_AMD:
+        if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
+            break;
+        goto no_apic;
+    case X86_VENDOR_INTEL:
+        if (boot_cpu_data.x86 == 6 ||
+            (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
+            (boot_cpu_data.x86 == 5 && cpu_has_apic))
+            break;
+        goto no_apic;
+    default:
+        goto no_apic;
+    }
+
+    if (!cpu_has_apic) {
+        /*
+         * Some BIOSes disable the local APIC in the
+         * APIC_BASE MSR. This can only be done in
+         * software for Intel P6 and AMD K7 (Model > 1).
+         */
+        rdmsr(MSR_IA32_APICBASE, l, h);
+        if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+            printk("Local APIC disabled by BIOS -- reenabling.\n");
+            l &= ~MSR_IA32_APICBASE_BASE;
+            l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+            wrmsr(MSR_IA32_APICBASE, l, h);
+        }
+    }
+
+    /* The APIC feature bit should now be enabled in `cpuid' */
+    features = cpuid_edx(1);
+    if (!(features & (1 << X86_FEATURE_APIC))) {
+        printk("Could not enable APIC!\n");
+        return -1;
+    }
+
+    set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
+    mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+    boot_cpu_physical_apicid = 0;
+
+    printk("Found and enabled local APIC!\n");
+    apic_pm_init1();
+    return 0;
+
+ no_apic:
+    printk("No local APIC present or hardware disabled\n");
+    return -1;
+}
+
+void __init init_apic_mappings(void)
+{
+    unsigned long apic_phys = 0;
+
+    /*
+     * If no local APIC can be found then set up a fake all zeroes page to 
+     * simulate the local APIC and another one for the IO-APIC.
+     */
+    if (!smp_found_config && detect_init_APIC()) {
+        apic_phys = get_free_page(GFP_KERNEL);
+        apic_phys = __pa(apic_phys);
+    } else
+        apic_phys = mp_lapic_addr;
+
+    set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+    Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
+
+    /*
+     * Fetch the APIC ID of the BSP in case we have a
+     * default configuration (or the MP table is broken).
+     */
+    if (boot_cpu_physical_apicid == -1U)
+        boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+#ifdef CONFIG_X86_IO_APIC
+    {
+        unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0;
+        int i;
+
+        for (i = 0; i < nr_ioapics; i++) {
+            if (smp_found_config)
+                ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+            set_fixmap_nocache(idx, ioapic_phys);
+            Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
+                    __fix_to_virt(idx), ioapic_phys);
+            idx++;
+        }
+    }
+#endif
+}
+
+/*****************************************************************************
+ * APIC calibration
+ * 
+ * The APIC is programmed in bus cycles.
+ * Timeout values should specified in real time units.
+ * The "cheapest" time source is the cyclecounter.
+ * 
+ * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
+ * 
+ * The calibration is currently a bit shoddy since it requires the external
+ * timer chip to generate periodic timer interupts. 
+ *****************************************************************************/
+
+/* used for system time scaling */
+static unsigned int bus_freq;
+static u32          bus_cycle;   /* length of one bus cycle in pico-seconds */
+static u32          bus_scale;   /* scaling factor convert ns to bus cycles */
+u64 cpu_freq;
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int __init get_8254_timer_count(void)
+{
+    /*extern spinlock_t i8253_lock;*/
+    /*unsigned long flags;*/
+    unsigned int count;
+    /*spin_lock_irqsave(&i8253_lock, flags);*/
+    outb_p(0x00, 0x43);
+    count = inb_p(0x40);
+    count |= inb_p(0x40) << 8;
+    /*spin_unlock_irqrestore(&i8253_lock, flags);*/
+    return count;
+}
+
+void __init wait_8254_wraparound(void)
+{
+    unsigned int curr_count, prev_count=~0;
+    int delta;
+    curr_count = get_8254_timer_count();
+    do {
+        prev_count = curr_count;
+        curr_count = get_8254_timer_count();
+        delta = curr_count-prev_count;
+        /*
+         * This limit for delta seems arbitrary, but it isn't, it's slightly 
+         * above the level of error a buggy Mercury/Neptune chipset timer can 
+         * cause.
+         */
+    } while (delta < 300);
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function with a very large value and read the current time after
+ * a well defined period of time as expired.
+ *
+ * Calibration is only performed once, for CPU0!
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+#define APIC_DIVISOR 1
+static void __setup_APIC_LVTT(unsigned int clocks)
+{
+    unsigned int lvtt1_value, tmp_value;
+    lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
+    apic_write_around(APIC_LVTT, lvtt1_value);
+    tmp_value = apic_read(APIC_TDCR);
+    apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
+    apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+/*
+ * this is done for every CPU from setup_APIC_clocks() below.
+ * We setup each local APIC with a zero timeout value for now.
+ * Unlike Linux, we don't have to wait for slices etc.
+ */
+void setup_APIC_timer(void * data)
+{
+    unsigned long flags;
+    __save_flags(flags);
+    __sti();
+    printk("cpu: %d: setup timer.", smp_processor_id());
+    __setup_APIC_LVTT(0);
+    printk("done\n");
+    __restore_flags(flags);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external timer.
+ *
+ * As a result we have the Bys Speed and CPU speed in Hz.
+ * 
+ * We want to do the calibration only once (for CPU0).  CPUs connected by the
+ * same APIC bus have the very same bus frequency.
+ *
+ * This bit is a bit shoddy since we use the very same periodic timer interrupt
+ * we try to eliminate to calibrate the APIC. 
+ */
+
+int __init calibrate_APIC_clock(void)
+{
+    unsigned long long t1 = 0, t2 = 0;
+    long tt1, tt2;
+    long result;
+    int i;
+    const int LOOPS = HZ/10;
+
+    printk("calibrating APIC timer for CPU%d...\n",  smp_processor_id());
+
+    /* Put whatever arbitrary (but long enough) timeout
+     * value into the APIC clock, we just want to get the
+     * counter running for calibration. */
+    __setup_APIC_LVTT(1000000000);
+
+    /* The timer chip counts down to zero. Let's wait
+     * for a wraparound to start exact measurement:
+     * (the current tick might have been already half done) */
+    wait_8254_wraparound();
+
+    /* We wrapped around just now. Let's start: */
+    rdtscll(t1);
+    tt1 = apic_read(APIC_TMCCT);
+
+    /* Let's wait LOOPS wraprounds: */
+    for (i = 0; i < LOOPS; i++)
+        wait_8254_wraparound();
+
+    tt2 = apic_read(APIC_TMCCT);
+    rdtscll(t2);
+
+    /* The APIC bus clock counter is 32 bits only, it
+     * might have overflown, but note that we use signed
+     * longs, thus no extra care needed.
+     * underflown to be exact, as the timer counts down ;) */
+    result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+
+    printk("..... CPU speed is %ld.%04ld MHz.\n",
+           ((long)(t2-t1)/LOOPS)/(1000000/HZ),
+           ((long)(t2-t1)/LOOPS)%(1000000/HZ));
+
+    printk("..... Bus speed is %ld.%04ld MHz.\n",
+           result/(1000000/HZ),
+           result%(1000000/HZ));
+
+    cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);
+
+    /* set up multipliers for accurate timer code */
+    bus_freq   = result*HZ;
+    bus_cycle  = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
+    bus_scale  = (1000*262144)/bus_cycle;
+
+    /* print results */
+    printk("..... bus_freq  = %u Hz\n",  bus_freq);
+    printk("..... bus_cycle = %u ps\n",  bus_cycle);
+    printk("..... bus_scale = %u \n",    bus_scale);
+    /* reset APIC to zero timeout value */
+    __setup_APIC_LVTT(0);
+    return result;
+}
+
+/*
+ * initialise the APIC timers for all CPUs
+ * we start with the first and find out processor frequency and bus speed
+ */
+void __init setup_APIC_clocks (void)
+{
+    printk("Using local APIC timer interrupts.\n");
+    using_apic_timer = 1;
+    __cli();
+    /* calibrate CPU0 for CPU speed and BUS speed */
+    bus_freq = calibrate_APIC_clock();
+    /* Now set up the timer for real. */
+    setup_APIC_timer((void *)bus_freq);
+    __sti();
+    /* and update all other cpus */
+    smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
+}
+
+#undef APIC_DIVISOR
+
+/*
+ * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
+ * returns 1 on success
+ * returns 0 if the timeout value is too small or in the past.
+ */
+int reprogram_ac_timer(s_time_t timeout)
+{
+    int 		cpu = smp_processor_id();
+    s_time_t	now;
+    s_time_t	expire;
+    u64			apic_tmict;
+
+    now = NOW();
+    expire = timeout - now;	/* value from now */
+
+    if (expire <= 0) {
+        printk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", 
+               cpu, (u32)(now>>32), (u32)now, (u32)(timeout>>32),(u32)timeout);
+        return 0;		/* timeout value in the past */
+    }
+
+    /* conversion to bus units */
+    apic_tmict = (((u64)bus_scale) * expire)>>18;
+
+    if (apic_tmict >= 0xffffffff) {
+        printk("APICT[%02d] Timeout value too large\n", cpu);
+        apic_tmict = 0xffffffff;
+    }
+    if (apic_tmict == 0) {
+        printk("APICT[%02d] timeout value too small\n", cpu);
+        return 0;
+    }
+
+    /* programm timer */
+    apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+
+    TRC(printk("APICT[%02d] reprog(): expire=%lld %u\n",
+               cpu, expire, apic_tmict));
+    return 1;
+}
+
+/*
+ * Local timer interrupt handler.
+ * here the programmable, accurate timers are executed.
+ * If we are on CPU0 and we should have updated jiffies, we do this 
+ * as well and and deal with traditional linux timers. Note, that of 
+ * the timer APIC on CPU does not go off every 10ms or so the linux 
+ * timers loose accuracy, but that shouldn't be a problem.
+ */
+static s_time_t last_cpu0_tirq = 0;
+inline void smp_local_timer_interrupt(struct pt_regs * regs)
+{
+    int cpu = smp_processor_id();
+    s_time_t diff, now;
+
+    /* if CPU 0 do old timer stuff  */
+    if (cpu == 0)
+    {
+        now = NOW();
+        diff = now - last_cpu0_tirq;
+
+        if (diff <= 0) {
+            printk ("System Time went backwards: %lld\n", diff);
+            return;
+        }
+
+        while (diff >= MILLISECS(10)) {
+            do_timer(regs);
+            diff           -= MILLISECS(10);
+            last_cpu0_tirq += MILLISECS(10);
+        }
+    }
+    /* call accurate timer function */
+    do_ac_timer();
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesnt support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+unsigned int apic_timer_irqs [NR_CPUS];
+
+void smp_apic_timer_interrupt(struct pt_regs * regs)
+{
+    int cpu = smp_processor_id();
+
+    /*
+     * the NMI deadlock-detector uses this.
+     */
+    apic_timer_irqs[cpu]++;
+
+    /*
+     * NOTE! We'd better ACK the irq immediately, because timer handling can 
+     * be slow. XXX is this save?
+     */
+    ack_APIC_irq();
+
+	/* call the local handler */
+    irq_enter(cpu, 0);
+    smp_local_timer_interrupt(regs);
+    irq_exit(cpu, 0);
+
+    if (softirq_pending(cpu))
+        do_softirq();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+    unsigned long v;
+
+    /*
+     * Check if this really is a spurious interrupt and ACK it
+     * if it is a vectored one.  Just in case...
+     * Spurious interrupts should not be ACKed.
+     */
+    v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+    if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+        ack_APIC_irq();
+
+    /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+    printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+           smp_processor_id());
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+    unsigned long v, v1;
+
+    /* First tickle the hardware, only then report what went on. -- REW */
+    v = apic_read(APIC_ESR);
+    apic_write(APIC_ESR, 0);
+    v1 = apic_read(APIC_ESR);
+    ack_APIC_irq();
+    atomic_inc(&irq_err_count);
+
+    /* Here is what the APIC error bits mean:
+       0: Send CS error
+       1: Receive CS error
+       2: Send accept error
+       3: Receive accept error
+       4: Reserved
+       5: Send illegal vector
+       6: Received illegal vector
+       7: Illegal register address
+    */
+    printk ("APIC error on CPU%d: %02lx(%02lx)\n",
+            smp_processor_id(), v , v1);
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+    if (!smp_found_config && !cpu_has_apic)
+        return -1;
+
+    /*
+     * Complain if the BIOS pretends there is one.
+     */
+    if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
+    {
+        printk("BIOS bug, local APIC #%d not detected!...\n",
+               boot_cpu_physical_apicid);
+        return -1;
+    }
+
+    verify_local_APIC();
+
+    connect_bsp_APIC();
+
+    phys_cpu_present_map = 1;
+    apic_write_around(APIC_ID, boot_cpu_physical_apicid);
+
+    apic_pm_init2();
+
+    setup_local_APIC();
+
+#ifdef CONFIG_X86_IO_APIC
+    if (smp_found_config && nr_ioapics)
+        setup_IO_APIC();
+#endif
+    setup_APIC_clocks();
+
+    return 0;
+}
diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S
new file mode 100644
index 0000000000..091b760576
--- /dev/null
+++ b/xen/arch/i386/boot/boot.S
@@ -0,0 +1,239 @@
+#include <xeno/config.h>
+#include <asm/page.h>
+
+#define  SECONDARY_CPU_FLAG 0xA5A5A5A5
+                
+       	.text
+
+ENTRY(start)
+        jmp hal_entry
+
+        .align	4
+
+/*** MULTIBOOT HEADER ****/
+        /* Magic number indicating a Multiboot header. */
+	.long	0x1BADB002
+	/* Flags to bootloader (see Multiboot spec). */
+	.long	0x00000006
+	/* Checksum: must be the negated sum of the first two fields. */
+	.long	-0x1BADB008
+        /* Unused loader addresses (ELF header has all this already).*/
+        .long   0,0,0,0,0
+        /* EGA text mode. */
+        .long   1,0,0,0
+        
+hal_entry:
+        /* Set up a few descriptors: on entry only CS is guaranteed good. */
+        lgdt    %cs:nopaging_gdt_descr-__PAGE_OFFSET
+        mov     $(__HYPERVISOR_DS),%ecx
+        mov     %ecx,%ds
+        mov     %ecx,%es
+        ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
+1:      lss     stack_start-__PAGE_OFFSET,%esp
+
+        /* Reset EFLAGS (subsumes CLI and CLD). */
+	pushl	$0
+	popf
+
+        /* CPU type checks. We need P6+. */
+        mov     $0x200000,%edx
+        pushfl
+        pop     %ecx
+        and     %edx,%ecx
+        jne     bad_cpu            # ID bit should be clear
+        pushl   %edx
+        popfl
+        pushfl
+        pop     %ecx
+        and     %edx,%ecx
+        je      bad_cpu            # ID bit should be set
+
+        /* Set up CR0. */
+        mov     %cr0,%ecx
+        and     $0x00000011,%ecx   # save ET and PE
+        or      $0x00050022,%ecx   # set AM, WP, NE and MP
+        mov     %ecx,%cr0
+
+        /* Set up FPU. */
+        fninit
+        
+        /* Set up CR4, except global flag which Intel requires should be     */
+        /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
+        mov     %cr4,%ecx
+        or      mmu_cr4_features-__PAGE_OFFSET,%ecx
+        mov     %ecx,mmu_cr4_features-__PAGE_OFFSET
+        and     $0x7f,%ecx /* disable GLOBAL bit */
+        mov     %ecx,%cr4
+                
+        /* Is this a non-boot processor? */
+        cmp     $(SECONDARY_CPU_FLAG),%ebx
+        jne     continue_boot_cpu
+        
+        call    start_paging
+        lidt    idt_descr                        
+        jmp     initialize_secondary
+        
+continue_boot_cpu:
+        add     $__PAGE_OFFSET,%ebx
+	push 	%ebx /* Multiboot info struct */
+	push 	%eax /* Multiboot magic value */
+
+        /* Initialize BSS (no nasty surprises!) */
+        mov     $__bss_start-__PAGE_OFFSET,%edi
+        mov     $_end-__PAGE_OFFSET,%ecx
+        sub     %edi,%ecx
+        xor     %eax,%eax
+        rep     stosb
+
+        /* Initialize low and high mappings of all memory with 4MB pages */
+        mov     $idle0_pg_table-__PAGE_OFFSET,%edi
+        mov     $0x1e3,%eax                  /* PRESENT+RW+A+D+4MB+GLOBAL */
+1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
+        stosl                                /* low mapping */
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $MAX_DIRECTMAP_ADDRESS+0x1e3,%eax
+        jne     1b
+
+        call    start_paging        
+        call    setup_idt
+        lidt    idt_descr
+                
+        /* Call into main C routine. This should never return.*/
+       	call	cmain
+        ud2     /* Force a panic (invalid opcode). */
+
+start_paging:
+        mov     $idle0_pg_table-__PAGE_OFFSET,%eax
+        mov     %eax,%cr3
+        mov     %cr0,%eax
+        or      $0x80010000,%eax /* set PG and WP bits */
+        mov     %eax,%cr0
+        jmp     1f
+1:      /* Install relocated selectors (FS/GS unused). */
+        lgdt    gdt_descr
+        mov     $(__HYPERVISOR_DS),%ecx
+        mov     %ecx,%ds
+        mov     %ecx,%es
+        mov     %ecx,%ss
+        ljmp    $(__HYPERVISOR_CS),$1f
+1:      /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
+        movl    mmu_cr4_features,%ecx
+        movl    %ecx,%cr4
+        /* Relocate ESP */
+        add     $__PAGE_OFFSET,%esp
+        /* Relocate EIP via return jump */
+        pop     %ecx
+        add     $__PAGE_OFFSET,%ecx
+        jmp     *%ecx
+    
+            
+/*** INTERRUPT INITIALISATION ***/
+        
+setup_idt:
+        lea     ignore_int,%edx
+        mov     $(__HYPERVISOR_CS << 16),%eax
+        mov     %dx,%ax            /* selector = 0x0010 = cs */
+        mov     $0x8E00,%dx        /* interrupt gate - dpl=0, present */
+
+        lea     SYMBOL_NAME(idt_table),%edi
+        mov     $256,%ecx
+1:      mov     %eax,(%edi)
+        mov     %edx,4(%edi)
+        add     $8,%edi
+        loop    1b
+        ret
+
+/* This is the default interrupt handler. */
+int_msg:
+        .asciz "Unknown interrupt\n"
+        ALIGN
+ignore_int:
+        cld
+        push    %eax
+        push    %ecx
+        push    %edx
+        pushl   %es
+        pushl   %ds
+        mov     $(__HYPERVISOR_DS),%eax
+        mov     %eax,%ds
+        mov     %eax,%es
+        pushl   $int_msg
+        call    SYMBOL_NAME(printf)
+1:      jmp     1b
+        pop     %eax
+        popl    %ds
+        popl    %es
+        pop     %edx
+        pop     %ecx
+        pop     %eax
+        iret
+
+
+bad_cpu_msg:
+        .asciz  "Bad CPU type. Need P6+."
+        ALIGN
+bad_cpu: 
+        call    init_serial
+        mov     $bad_cpu_msg,%esi
+1:      lodsb
+        test    %al,%al
+        je      1f
+        push    %eax
+        call    putchar_serial
+        add     $4,%esp
+        jmp     1b
+1:      jmp     1b
+                   
+        
+/*** STACK LOCATION ***/
+        
+ENTRY(stack_start)
+        .long SYMBOL_NAME(idle0_task_union)+8192-__PAGE_OFFSET
+        .long __HYPERVISOR_DS
+        
+/*** DESCRIPTOR TABLES ***/
+
+.globl SYMBOL_NAME(idt)
+.globl SYMBOL_NAME(gdt)        
+
+        ALIGN
+        
+        .word   0    
+idt_descr:
+	.word	256*8-1
+SYMBOL_NAME(idt):
+        .long	SYMBOL_NAME(idt_table)
+
+        .word   0
+gdt_descr:
+	.word	256*8-1
+SYMBOL_NAME(gdt):       
+        .long   SYMBOL_NAME(gdt_table)	/* gdt base */
+
+        .word   0
+nopaging_gdt_descr:
+        .word   256*8-1
+        .long   SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
+        
+        ALIGN
+/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
+/*     machine->physical mapping table. Ring 0 can access all memory.    */
+ENTRY(gdt_table)
+        .quad 0x0000000000000000     /* NULL descriptor */
+        .quad 0x0000000000000000     /* not used */
+        .quad 0x00cfba000000c3ff     /* 0x11 ring 1 3.95GB code at 0x0 */
+        .quad 0x00cfb2000000c3ff     /* 0x19 ring 1 3.95GB data at 0x0 */
+        .quad 0x00cffa000000c3ff     /* 0x23 ring 3 3.95GB code at 0x0 */
+        .quad 0x00cff2000000c3ff     /* 0x2b ring 3 3.95GB data at 0x0 */
+        .quad 0x00cf9a000000ffff     /* 0x30 ring 0 4.00GB code at 0x0 */
+        .quad 0x00cf92000000ffff     /* 0x38 ring 0 4.00GB data at 0x0 */
+        .fill NR_CPUS,8,0             /* space for TSS's */
+
+# The following adds 12kB to the kernel file size.
+        .org 0x1000
+ENTRY(idle0_pg_table)
+        .org 0x2000
+ENTRY(idle0_task_union)
+        .org 0x4000
+ENTRY(stext)
+ENTRY(_stext)
diff --git a/xen/arch/i386/delay.c b/xen/arch/i386/delay.c
new file mode 100644
index 0000000000..078ff77344
--- /dev/null
+++ b/xen/arch/i386/delay.c
@@ -0,0 +1,29 @@
+/*
+ *	Precise Delay Loops for i386
+ *
+ *	Copyright (C) 1993 Linus Torvalds
+ *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *	The __delay function must _NOT_ be inlined as its execution time
+ *	depends wildly on alignment on many x86 processors. The additional
+ *	jump magic is needed to get the timing stable on all the CPU's
+ *	we have to worry about.
+ */
+
+#include <xeno/config.h>
+#include <xeno/delay.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+void __udelay(unsigned long usecs)
+{
+    unsigned long ticks = usecs * ticks_per_usec;
+    unsigned long s, e;
+
+    rdtscl(s);
+    do
+    {
+        rep_nop();
+        rdtscl(e);
+    } while ((e-s) < ticks);
+}
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
new file mode 100644
index 0000000000..928a96ed4e
--- /dev/null
+++ b/xen/arch/i386/entry.S
@@ -0,0 +1,534 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * Stack layout in 'ret_from_system_call':
+ *	 0(%esp) - %ebx
+ *	 4(%esp) - %ecx
+ *	 8(%esp) - %edx
+ *       C(%esp) - %esi
+ *	10(%esp) - %edi
+ *	14(%esp) - %ebp
+ *	18(%esp) - %eax
+ *	1C(%esp) - %ds
+ *	20(%esp) - %es
+ *	24(%esp) - orig_eax
+ *	28(%esp) - %eip
+ *	2C(%esp) - %cs
+ *	30(%esp) - %eflags
+ *	34(%esp) - %oldesp
+ *	38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+/* The idea for callbacks from monitor -> guest OS.
+ * 
+ * First, we require that all callbacks (either via a supplied
+ * interrupt-descriptor-table, or via the special event or failsafe callbacks
+ * in the shared-info-structure) are to ring 1. This just makes life easier,
+ * in that it means we don't have to do messy GDT/LDT lookups to find
+ * out which the privilege-level of the return code-selector. That code
+ * would just be a hassle to write, and would need to account for running
+ * off the end of the GDT/LDT, for example. The event callback has quite
+ * a constrained callback method: the guest OS provides a linear address
+ * which we call back to using the hard-coded __GUEST_CS descriptor (which
+ * is a ring 1 descriptor). For IDT callbacks, we check that the provided
+ * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
+ * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
+ * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
+ * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
+ * than the correct ring) and bad things are bound to ensue -- IRET is
+ * likely to fault, and we may end up killing the domain (no harm can
+ * come to the hypervisor itself, though).
+ *      
+ * When doing a callback, we check if the return CS is in ring 0. If so,
+ * callback is delayed until next return to ring != 0.
+ * If return CS is in ring 1, then we create a callback frame
+ * starting at return SS/ESP. The base of the frame does an intra-privilege
+ * interrupt-return.
+ * If return CS is in ring > 1, we create a callback frame starting
+ * at SS/ESP taken from appropriate section of the current TSS. The base
+ * of the frame does an inter-privilege interrupt-return.
+ * 
+ * Note that the "failsafe callback" uses a special stackframe:
+ * { return_DS, return_ES, return_EIP, return_CS, return_EFLAGS, ... }
+ * That is, original values for DS/ES are placed on stack rather than
+ * in DS/ES themselves. Why? It saves us loading them, only to have them
+ * saved/restored in guest OS. Furthermore, if we load them we may cause
+ * a fault if they are invalid, which is a hassle to deal with. We avoid
+ * that problem if we don't load them :-) This property allows us to use
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES
+ * on return to ring != 0, we can simply package it up as a return via
+ * the failsafe callback, and let the guest OS sort it out (perhaps by
+ * killing an application process). Note that we also do this for any
+ * faulting IRET -- just let the guest OS handle it via the event
+ * callback.
+ *
+ * We terminate a domain in the following cases:
+ *  - creating a callback stack frame (due to bad ring-1 stack).
+ *  - faulting IRET on entry to failsafe callback handler.
+ * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
+ * handler in good order (absolutely no faults allowed!).
+ */
+
+#include <xeno/config.h>
+#include <asm/smp.h>
+
+EBX		= 0x00
+ECX		= 0x04
+EDX		= 0x08
+ESI		= 0x0C
+EDI		= 0x10
+EBP		= 0x14
+EAX		= 0x18
+DS		= 0x1C
+ES		= 0x20
+ORIG_EAX	= 0x24
+EIP		= 0x28
+CS		= 0x2C
+EFLAGS		= 0x30
+OLDESP		= 0x34
+OLDSS		= 0x38
+
+/* Offsets in task_struct */
+PROCESSOR       =  0
+STATE           =  4
+HYP_EVENTS      =  8
+DOMAIN          = 12        
+SHARED_INFO     = 16
+
+/* Offsets in shared_info_t */
+EVENTS          =  0
+EVENTS_ENABLE   =  4
+EVENT_ADDR      =  8
+FAILSAFE_ADDR   = 12
+
+/* Offsets in guest_trap_bounce */
+GTB_ERROR_CODE  =  0
+GTB_CR2         =  4
+GTB_FLAGS       =  8
+GTB_CS          = 10
+GTB_EIP         = 12
+GTBF_TRAP       =  1
+GTBF_TRAP_NOCODE = 2
+GTBF_TRAP_CR2   = 4
+                        
+CF_MASK		= 0x00000001
+IF_MASK		= 0x00000200
+NT_MASK		= 0x00004000
+
+#define SAVE_ALL \
+	cld; \
+	pushl %es; \
+	pushl %ds; \
+	pushl %eax; \
+	pushl %ebp; \
+	pushl %edi; \
+	pushl %esi; \
+	pushl %edx; \
+	pushl %ecx; \
+	pushl %ebx; \
+	movl $(__HYPERVISOR_DS),%edx; \
+	movl %edx,%ds; \
+	movl %edx,%es;
+
+#define RESTORE_ALL	\
+	popl %ebx;	\
+	popl %ecx;	\
+	popl %edx;	\
+	popl %esi;	\
+	popl %edi;	\
+	popl %ebp;	\
+	popl %eax;	\
+1:	popl %ds;	\
+2:	popl %es;	\
+        addl $4,%esp;	\
+3:      iret;		\
+.section .fixup,"ax";	\
+6:      subl $4,%esp;   \
+        pushl %es;      \
+5:      pushl %ds;      \
+4:      pushl %eax;     \
+	pushl %ebp;     \
+	pushl %edi;     \
+	pushl %esi;     \
+	pushl %edx;     \
+	pushl %ecx;     \
+	pushl %ebx;     \
+	pushl %ss;           \
+	popl  %ds;           \
+	pushl %ss;           \
+	popl  %es;           \
+	jmp  failsafe_callback;      \
+.previous;                           \
+.section __ex_table,"a";             \
+	.align 4;	             \
+	.long 1b,4b;       	     \
+	.long 2b,5b;	             \
+	.long 3b,6b;	             \
+.previous
+
+#define GET_CURRENT(reg)  \
+	movl $-8192, reg; \
+	andl %esp, reg
+
+ENTRY(ret_from_newdomain)
+	GET_CURRENT(%ebx)
+	jmp test_all_events
+
+        ALIGN
+restore_all:
+	RESTORE_ALL
+
+        ALIGN
+ENTRY(hypervisor_call)
+        pushl %eax			# save orig_eax
+	SAVE_ALL
+	GET_CURRENT(%ebx)
+	andl $255,%eax
+	call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
+	movl %eax,EAX(%esp)		# save the return value
+
+test_all_events:
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax                    # sizeof(irq_cpustat) == 16
+        lea  guest_trap_bounce(%eax),%edx
+        cli                             # tests must not race interrupts
+        xorl %ecx,%ecx
+        notl %ecx
+test_softirqs:  
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax                    # sizeof(irq_cpustat) == 16
+        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+        jnz  process_softirqs
+test_hyp_events:        
+        test %ecx, HYP_EVENTS(%ebx)
+        jnz  process_hyp_events
+test_guest_events:      
+        movl SHARED_INFO(%ebx),%eax
+        test %ecx,EVENTS(%eax)
+        jz   restore_all
+        test %ecx,EVENTS_ENABLE(%eax)
+        jz   restore_all
+        /* Prevent unnecessary reentry of event callback (stack overflow!) */
+        xorl %ecx,%ecx
+        movl %ecx,EVENTS_ENABLE(%eax)      
+/* %eax == shared_info, %ebx == task_struct, %edx == guest_trap_bounce */
+process_guest_events:   
+        movl EVENT_ADDR(%eax),%eax
+        movl %eax,GTB_EIP(%edx)
+        movw $__GUEST_CS,GTB_CS(%edx)
+        call create_bounce_frame
+        jmp  restore_all
+
+        ALIGN
+process_softirqs:       
+        push %edx
+        call SYMBOL_NAME(do_softirq)
+        pop  %edx
+        jmp  test_hyp_events
+        
+        ALIGN
+process_hyp_events:
+        sti
+        call SYMBOL_NAME(do_hyp_events)
+        jmp  test_all_events
+
+/* No special register assumptions */
+failsafe_callback:
+        GET_CURRENT(%ebx)
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax
+        lea  guest_trap_bounce(%eax),%edx
+        movl SHARED_INFO(%ebx),%eax
+        movl FAILSAFE_ADDR(%eax),%eax
+        movl %eax,GTB_EIP(%edx)
+        movw $__GUEST_CS,GTB_CS(%edx)
+        call create_bounce_frame
+        subl $8,%esi                 # add DS/ES to failsafe stack frame
+        movl DS(%esp),%eax
+FAULT1: movl %eax,(%esi) 
+        movl ES(%esp),%eax
+FAULT2: movl %eax,4(%esi)
+        movl %esi,OLDESP(%esp)
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+        popl %eax
+        addl $12,%esp
+FAULT3: iret 
+
+        
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:         */
+/*   {EIP, CS, EFLAGS, [ESP, SS]}                                     */
+/* %edx == guest_trap_bounce, %ebx == task_struct                     */
+/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP.        */
+create_bounce_frame:        
+        mov  CS+4(%esp),%cl
+        test $2,%cl
+        jz   1f /* jump if returning to an existing ring-1 activation */
+        /* obtain ss/esp from TSS -- no current ring-1 activations */
+        movl PROCESSOR(%ebx),%eax
+        shll $8,%eax /* multiply by 256 */
+        addl $init_tss + 12,%eax
+        movl (%eax),%esi /* tss->esp1 */
+FAULT4: movl 4(%eax),%ds /* tss->ss1  */
+        /* base of stack frame must contain ss/esp (inter-priv iret) */
+        subl $8,%esi
+        movl OLDESP+4(%esp),%eax
+FAULT5: movl %eax,(%esi) 
+        movl OLDSS+4(%esp),%eax
+FAULT6: movl %eax,4(%esi) 
+        jmp 2f
+1:      /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
+        movl OLDESP+4(%esp),%esi
+FAULT7: movl OLDSS+4(%esp),%ds 
+2:      /* Construct a stack frame: EFLAGS, CS/EIP */
+        subl $12,%esi
+        movl EIP+4(%esp),%eax
+FAULT8: movl %eax,(%esi) 
+        movl CS+4(%esp),%eax
+FAULT9: movl %eax,4(%esi) 
+        movl EFLAGS+4(%esp),%eax
+FAULT10:movl %eax,8(%esi)
+        /* Rewrite our stack frame and return to ring 1. */
+        /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
+        andl $0xfffcbeff,%eax
+        movl %eax,EFLAGS+4(%esp)
+        movl %ds,OLDSS+4(%esp)
+        movl %esi,OLDESP+4(%esp)
+        movzwl %es:GTB_CS(%edx),%eax
+        movl %eax,CS+4(%esp)
+        movl %es:GTB_EIP(%edx),%eax
+        movl %eax,EIP+4(%esp)
+        ret
+        
+                              
+.section __ex_table,"a"
+        .align 4
+        .long FAULT1, kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT2, kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT3, kill_domain_fixup1 # Fault executing failsafe iret
+        .long FAULT4, kill_domain_fixup2 # Fault loading ring-1 stack selector
+        .long FAULT5, kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT6, kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT7, kill_domain_fixup2 # Fault loading ring-1 stack selector
+        .long FAULT8, kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT9, kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT10,kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT11,kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT12,kill_domain_fixup3 # Fault writing to ring-1 stack
+.previous
+               
+# This handler kills domains which experience unrecoverable faults.
+.section .fixup,"ax"
+kill_domain_fixup1:
+        subl  $4,%esp
+        SAVE_ALL
+        jmp   kill_domain
+kill_domain_fixup2:
+        addl  $4,%esp                     
+kill_domain_fixup3:
+        pushl %ss
+        popl  %ds
+        jmp   kill_domain
+.previous
+
+        ALIGN
+process_guest_exception_and_events:        
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax                    # sizeof(irq_cpustat) == 16
+        lea  guest_trap_bounce(%eax),%edx
+        testb $~0,GTB_FLAGS(%edx)
+        jz   test_all_events
+        call create_bounce_frame        # just the basic frame
+        mov  %es:GTB_FLAGS(%edx),%cl
+        test $GTBF_TRAP_NOCODE,%cl
+        jnz  2f
+        subl $4,%esi                    # push error_code onto guest frame
+        movl %es:GTB_ERROR_CODE(%edx),%eax
+FAULT11:movl %eax,(%esi)
+        test $GTBF_TRAP_CR2,%cl
+        jz   1f
+        subl $4,%esi                    # push %cr2 onto guest frame
+        movl %es:GTB_CR2(%edx),%eax
+FAULT12:movl %eax,(%esi)
+1:      movl %esi,OLDESP(%esp)        
+2:      push %es                        # unclobber %ds
+        pop  %ds 
+        movb $0,GTB_FLAGS(%edx)
+        jmp  test_all_events
+
+        ALIGN
+ENTRY(ret_from_intr)
+	GET_CURRENT(%ebx)
+        movb CS(%esp),%al
+	testb $3,%al	# return to non-supervisor?
+	jne test_all_events
+	jmp restore_all
+
+        ALIGN
+ret_from_exception:
+        movb CS(%esp),%al
+	testb $3,%al	# return to non-supervisor?
+	jne process_guest_exception_and_events
+        jmp restore_all
+
+	ALIGN
+
+ENTRY(divide_error)
+	pushl $0		# no error code
+	pushl $ SYMBOL_NAME(do_divide_error)
+	ALIGN
+error_code:
+	pushl %ds
+	pushl %eax
+	xorl %eax,%eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	decl %eax			# eax = -1
+	pushl %ecx
+	pushl %ebx
+	cld
+	movl %es,%ecx
+	movl ORIG_EAX(%esp), %esi	# get the error code
+	movl ES(%esp), %edi		# get the function address
+	movl %eax, ORIG_EAX(%esp)
+	movl %ecx, ES(%esp)
+	movl %esp,%edx
+	pushl %esi			# push the error code
+	pushl %edx			# push the pt_regs pointer
+	movl $(__HYPERVISOR_DS),%edx
+	movl %edx,%ds
+	movl %edx,%es
+	GET_CURRENT(%ebx)
+	call *%edi
+	addl $8,%esp
+	jmp ret_from_exception
+
+ENTRY(coprocessor_error)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_coprocessor_error)
+	jmp error_code
+
+ENTRY(simd_coprocessor_error)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+	jmp error_code
+
+ENTRY(device_not_available)
+	pushl $0
+        pushl $SYMBOL_NAME(math_state_restore)
+        jmp   error_code
+
+ENTRY(debug)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_debug)
+	jmp error_code
+
+ENTRY(nmi)
+	pushl %eax
+	SAVE_ALL
+	movl %esp,%edx
+	pushl $0
+	pushl %edx
+	call SYMBOL_NAME(do_nmi)
+	addl $8,%esp
+	RESTORE_ALL
+
+ENTRY(int3)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_int3)
+	jmp error_code
+
+ENTRY(overflow)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_overflow)
+	jmp error_code
+
+ENTRY(bounds)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_bounds)
+	jmp error_code
+
+ENTRY(invalid_op)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_invalid_op)
+	jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+	jmp error_code
+
+ENTRY(double_fault)
+	pushl $ SYMBOL_NAME(do_double_fault)
+	jmp error_code
+
+ENTRY(invalid_TSS)
+	pushl $ SYMBOL_NAME(do_invalid_TSS)
+	jmp error_code
+
+ENTRY(segment_not_present)
+	pushl $ SYMBOL_NAME(do_segment_not_present)
+	jmp error_code
+
+ENTRY(stack_segment)
+	pushl $ SYMBOL_NAME(do_stack_segment)
+	jmp error_code
+
+ENTRY(general_protection)
+	pushl $ SYMBOL_NAME(do_general_protection)
+	jmp error_code
+
+ENTRY(alignment_check)
+	pushl $ SYMBOL_NAME(do_alignment_check)
+	jmp error_code
+
+ENTRY(page_fault)
+	pushl $ SYMBOL_NAME(do_page_fault)
+	jmp error_code
+
+ENTRY(machine_check)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_machine_check)
+	jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+	pushl $0
+	pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+	jmp error_code
+
+.data
+ENTRY(hypervisor_call_table)
+        .long SYMBOL_NAME(do_set_trap_table)
+        .long SYMBOL_NAME(do_process_page_updates)
+        .long SYMBOL_NAME(do_console_write)
+        .long SYMBOL_NAME(do_set_gdt)
+        .long SYMBOL_NAME(do_stack_and_ldt_switch)
+        .long SYMBOL_NAME(do_net_update)
+        .long SYMBOL_NAME(do_fpu_taskswitch)
+        .long SYMBOL_NAME(do_sched_op)
+        .long SYMBOL_NAME(kill_domain)
+        .long SYMBOL_NAME(do_dom0_op)
+        .long SYMBOL_NAME(do_network_op)
+        .long SYMBOL_NAME(do_block_io_op)
+        .long SYMBOL_NAME(do_set_debugreg)
+        .long SYMBOL_NAME(do_get_debugreg)
+        .long SYMBOL_NAME(do_update_descriptor)
+        .long SYMBOL_NAME(do_set_fast_trap)
+        .rept NR_syscalls-(.-hypervisor_call_table)/4
+        .long SYMBOL_NAME(sys_ni_syscall)
+	.endr
diff --git a/xen/arch/i386/extable.c b/xen/arch/i386/extable.c
new file mode 100644
index 0000000000..4cd9f064c3
--- /dev/null
+++ b/xen/arch/i386/extable.c
@@ -0,0 +1,62 @@
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long
+search_one_table(const struct exception_table_entry *first,
+		 const struct exception_table_entry *last,
+		 unsigned long value)
+{
+        while (first <= last) {
+		const struct exception_table_entry *mid;
+		long diff;
+
+		mid = (last - first) / 2 + first;
+		diff = mid->insn - value;
+                if (diff == 0)
+                        return mid->fixup;
+                else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+        return 0;
+}
+
+extern spinlock_t modlist_lock;
+
+unsigned long
+search_exception_table(unsigned long addr)
+{
+	unsigned long ret = 0;
+	
+#ifndef CONFIG_MODULES
+	/* There is only the kernel to search.  */
+	ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
+	return ret;
+#else
+	unsigned long flags;
+	/* The kernel is the last "module" -- no need to treat it special.  */
+	struct module *mp;
+
+	spin_lock_irqsave(&modlist_lock, flags);
+	for (mp = module_list; mp != NULL; mp = mp->next) {
+		if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
+			continue;
+		ret = search_one_table(mp->ex_table_start,
+				       mp->ex_table_end - 1, addr);
+		if (ret)
+			break;
+	}
+	spin_unlock_irqrestore(&modlist_lock, flags);
+	return ret;
+#endif
+}
diff --git a/xen/arch/i386/i387.c b/xen/arch/i386/i387.c
new file mode 100644
index 0000000000..fe34ff16f5
--- /dev/null
+++ b/xen/arch/i386/i387.c
@@ -0,0 +1,56 @@
+/*
+ *  linux/arch/i386/kernel/i387.c
+ *
+ *  Copyright (C) 1994 Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+void init_fpu(void)
+{
+    __asm__("fninit");
+    if ( cpu_has_xmm ) load_mxcsr(0x1f80);
+    current->flags |= PF_DONEFPUINIT;
+}
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+    if ( cpu_has_fxsr ) {
+        asm volatile( "fxsave %0 ; fnclex"
+                      : "=m" (tsk->thread.i387.fxsave) );
+    } else {
+        asm volatile( "fnsave %0 ; fwait"
+                      : "=m" (tsk->thread.i387.fsave) );
+    }
+    tsk->flags &= ~PF_USEDFPU;
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+    /*
+     * The guest OS may have set the 'virtual STTS' flag.
+     * This causes us to set the real flag, so we'll need
+     * to temporarily clear it while saving f-p state.
+     */
+    if ( tsk->flags & PF_GUEST_STTS ) clts();
+    __save_init_fpu(tsk);
+    stts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+    if ( cpu_has_fxsr ) {
+        asm volatile( "fxrstor %0"
+                      : : "m" (tsk->thread.i387.fxsave) );
+    } else {
+        asm volatile( "frstor %0"
+                      : : "m" (tsk->thread.i387.fsave) );
+    }
+}
diff --git a/xen/arch/i386/i8259.c b/xen/arch/i386/i8259.c
new file mode 100644
index 0000000000..645b7b0fef
--- /dev/null
+++ b/xen/arch/i386/i8259.c
@@ -0,0 +1,481 @@
+/******************************************************************************
+ * i8259.c
+ * 
+ * Well, this is required for SMP systems as well, as it build interrupt
+ * tables for IO APICS as well as uniprocessor 8259-alikes.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <asm/ptrace.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/desc.h>
+#include <asm/bitops.h>
+#include <xeno/delay.h>
+#include <asm/apic.h>
+
+
+/*
+ * Common place to define all x86 IRQ vectors
+ *
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
+
+BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+	BUILD_IRQ(x##y)
+
+#define BUILD_16_IRQS(x) \
+	BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+	BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+	BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+    BUILD_16_IRQS(0x0)
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these 
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+    BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+    BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+    BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+    BUILD_16_IRQS(0xc)
+#endif
+
+#undef BUILD_16_IRQS
+#undef BI
+
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+#ifdef CONFIG_SMP
+    BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
+    BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+    BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+#endif
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+    BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+    BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+    BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+#endif
+
+#define IRQ(x,y) \
+	IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+	IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+	IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+    void (*interrupt[NR_IRQS])(void) = {
+	IRQLIST_16(0x0),
+
+#ifdef CONFIG_X86_IO_APIC
+        IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+	IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+	IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+	IRQLIST_16(0xc)
+#endif
+    };
+
+#undef IRQ
+#undef IRQLIST_16
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+
+spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+
+static void end_8259A_irq (unsigned int irq)
+{
+    if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+        enable_8259A_irq(irq);
+}
+
+#define shutdown_8259A_irq	disable_8259A_irq
+
+void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{ 
+    enable_8259A_irq(irq);
+    return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+    "XT-PIC",
+    startup_8259A_irq,
+    shutdown_8259A_irq,
+    enable_8259A_irq,
+    disable_8259A_irq,
+    mask_and_ack_8259A,
+    end_8259A_irq,
+    NULL
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y) 	(((unsigned char *)&(y))[x])
+#define cached_21	(__byte(0,cached_irq_mask))
+#define cached_A1	(__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+    unsigned int mask = 1 << irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    cached_irq_mask |= mask;
+    if (irq & 8)
+        outb(cached_A1,0xA1);
+    else
+        outb(cached_21,0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+    unsigned int mask = ~(1 << irq);
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    cached_irq_mask &= mask;
+    if (irq & 8)
+        outb(cached_A1,0xA1);
+    else
+        outb(cached_21,0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+    unsigned int mask = 1<<irq;
+    unsigned long flags;
+    int ret;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    if (irq < 8)
+        ret = inb(0x20) & mask;
+    else
+        ret = inb(0xA0) & (mask >> 8);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+
+    return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+    disable_irq_nosync(irq);
+    io_apic_irqs &= ~(1<<irq);
+    irq_desc[irq].handler = &i8259A_irq_type;
+    enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+    int value;
+    int irqmask = 1<<irq;
+
+    if (irq < 8) {
+        outb(0x0B,0x20);		/* ISR register */
+        value = inb(0x20) & irqmask;
+        outb(0x0A,0x20);		/* back to the IRR register */
+        return value;
+    }
+    outb(0x0B,0xA0);		/* ISR register */
+    value = inb(0xA0) & (irqmask >> 8);
+    outb(0x0A,0xA0);		/* back to the IRR register */
+    return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+void mask_and_ack_8259A(unsigned int irq)
+{
+    unsigned int irqmask = 1 << irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    /*
+     * Lightweight spurious IRQ detection. We do not want
+     * to overdo spurious IRQ handling - it's usually a sign
+     * of hardware problems, so we only do the checks we can
+     * do without slowing down good hardware unnecesserily.
+     *
+     * Note that IRQ7 and IRQ15 (the two spurious IRQs
+     * usually resulting from the 8259A-1|2 PICs) occur
+     * even if the IRQ is masked in the 8259A. Thus we
+     * can check spurious 8259A IRQs without doing the
+     * quite slow i8259A_irq_real() call for every IRQ.
+     * This does not cover 100% of spurious interrupts,
+     * but should be enough to warn the user that there
+     * is something bad going on ...
+     */
+    if (cached_irq_mask & irqmask)
+        goto spurious_8259A_irq;
+    cached_irq_mask |= irqmask;
+
+ handle_real_irq:
+    if (irq & 8) {
+        inb(0xA1);		/* DUMMY - (do we need this?) */
+        outb(cached_A1,0xA1);
+        outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
+        outb(0x62,0x20);	/* 'Specific EOI' to master-IRQ2 */
+    } else {
+        inb(0x21);		/* DUMMY - (do we need this?) */
+        outb(cached_21,0x21);
+        outb(0x60+irq,0x20);	/* 'Specific EOI' to master */
+    }
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+    return;
+
+ spurious_8259A_irq:
+    /*
+     * this is the slow path - should happen rarely.
+     */
+    if (i8259A_irq_real(irq))
+        /*
+         * oops, the IRQ _is_ in service according to the
+         * 8259A - not spurious, go handle it.
+         */
+        goto handle_real_irq;
+
+    {
+        static int spurious_irq_mask;
+        /*
+         * At this point we can be sure the IRQ is spurious,
+         * lets ACK and report it. [once per IRQ]
+         */
+        if (!(spurious_irq_mask & irqmask)) {
+            printk("spurious 8259A interrupt: IRQ%d.\n", irq);
+            spurious_irq_mask |= irqmask;
+        }
+        atomic_inc(&irq_err_count);
+        /*
+         * Theoretically we do not have to handle this IRQ,
+         * but in Linux this does not cause problems and is
+         * simpler for us.
+         */
+        goto handle_real_irq;
+    }
+}
+
+void __init init_8259A(int auto_eoi)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+
+    outb(0xff, 0x21);	/* mask all of 8259A-1 */
+    outb(0xff, 0xA1);	/* mask all of 8259A-2 */
+
+    /*
+     * outb_p - this has to work on a wide range of PC hardware.
+     */
+    outb_p(0x11, 0x20);	/* ICW1: select 8259A-1 init */
+    outb_p(0x30 + 0, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+    outb_p(0x04, 0x21);	/* 8259A-1 (the master) has a slave on IR2 */
+    if (auto_eoi)
+        outb_p(0x03, 0x21);	/* master does Auto EOI */
+    else
+        outb_p(0x01, 0x21);	/* master expects normal EOI */
+
+    outb_p(0x11, 0xA0);	/* ICW1: select 8259A-2 init */
+    outb_p(0x30 + 8, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+    outb_p(0x02, 0xA1);	/* 8259A-2 is a slave on master's IR2 */
+    outb_p(0x01, 0xA1);	/* (slave's support for AEOI in flat mode
+                           is to be investigated) */
+
+    if (auto_eoi)
+        /*
+         * in AEOI mode we just have to mask the interrupt
+         * when acking.
+         */
+        i8259A_irq_type.ack = disable_8259A_irq;
+    else
+        i8259A_irq_type.ack = mask_and_ack_8259A;
+
+    udelay(100);		/* wait for 8259A to initialize */
+
+    outb(cached_21, 0x21);	/* restore master IRQ mask */
+    outb(cached_A1, 0xA1);	/* restore slave IRQ mask */
+
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+
+void __init init_ISA_irqs (void)
+{
+    int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+    init_bsp_APIC();
+#endif
+    init_8259A(0);
+
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc[i].status = IRQ_DISABLED;
+        irq_desc[i].action = 0;
+        irq_desc[i].depth = 1;
+
+        if (i < 16) {
+            /*
+             * 16 old-style INTA-cycle interrupts:
+             */
+            irq_desc[i].handler = &i8259A_irq_type;
+        } else {
+            /*
+             * 'high' PCI IRQs filled in on demand
+             */
+            irq_desc[i].handler = &no_irq_type;
+        }
+    }
+}
+
+void __init init_IRQ(void)
+{
+    int i;
+
+    init_ISA_irqs();
+
+    /*
+     * Cover the whole vector space, no vector can escape
+     * us. (some of these will be overridden and become
+     * 'special' SMP interrupts)
+     */
+    for (i = 0; i < NR_IRQS; i++) {
+        int vector = FIRST_EXTERNAL_VECTOR + i;
+        if (vector != HYPERVISOR_CALL_VECTOR) 
+            set_intr_gate(vector, interrupt[i]);
+    }
+
+#ifdef CONFIG_SMP
+    /*
+     * IRQ0 must be given a fixed assignment and initialized,
+     * because it's used before the IO-APIC is set up.
+     */
+    set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+    /*
+     * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+     * IPI, driven by wakeup.
+     */
+    set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
+
+    /* IPI for invalidation */
+    set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+    /* IPI for generic function call */
+    set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+#endif	
+
+#ifdef CONFIG_X86_LOCAL_APIC
+    /* self generated IPI for local APIC timer */
+    set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+    /* IPI vectors for APIC spurious and error interrupts */
+    set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+    set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+    /*
+     * Set the clock to HZ Hz, we already have a valid
+     * vector now:
+     */
+#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
+#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
+    outb_p(0x34,0x43);		/* binary, mode 2, LSB/MSB, ch 0 */
+    outb_p(LATCH & 0xff , 0x40);	/* LSB */
+    outb(LATCH >> 8 , 0x40);	/* MSB */
+
+    setup_irq(2, &irq2);
+}
+
+/*
+ * we only need the timer interrupt for callibrating the tsc<->time<->bus cycle
+ * mappings. After this all timeing related functions should be run of the 
+ * APIC timers. This function allows us to disable the 
+ */
+void __init disable_pit(void)
+{
+        printk("Disable PIT. Not needed anymore\n");
+        /* This is not the most elegant way, but hey. */
+        disable_irq(0);
+}
diff --git a/xen/arch/i386/idle0_task.c b/xen/arch/i386/idle0_task.c
new file mode 100644
index 0000000000..0d2b9e40bf
--- /dev/null
+++ b/xen/arch/i386/idle0_task.c
@@ -0,0 +1,20 @@
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/desc.h>
+
+/*
+ * Initial task structure. XXX KAF: To get this 8192-byte aligned without
+ * linker tricks I copy it into aligned BSS area at boot time.
+ * Actual name idle0_task_union now declared in boot.S.
+ */
+struct task_struct first_task_struct = IDLE0_TASK(idle0_task_union.task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */ 
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+
diff --git a/xen/arch/i386/io_apic.c b/xen/arch/i386/io_apic.c
new file mode 100644
index 0000000000..878c189afb
--- /dev/null
+++ b/xen/arch/i386/io_apic.c
@@ -0,0 +1,1683 @@
+/*
+ *	Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *	Many thanks to Stig Venaas for trying out countless experimental
+ *	patches and reporting/debugging problems patiently!
+ *
+ *	(c) 1999, Multiple IO-APIC support, developed by
+ *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *	further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *	and Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <xeno/delay.h>
+#include <xeno/sched.h>
+#include <xeno/config.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/smpboot.h>
+
+
+static unsigned int nmi_watchdog;  /* XXXX XEN */
+
+#undef APIC_LOCKUP_DEBUG
+
+#define APIC_LOCKUP_DEBUG
+
+static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+
+unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
+unsigned char int_delivery_mode = dest_LowestPrio;
+
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+	int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+	static int first_free_entry = NR_IRQS;
+	struct irq_pin_list *entry = irq_2_pin + irq;
+
+	while (entry->next)
+		entry = irq_2_pin + entry->next;
+
+	if (entry->pin != -1) {
+		entry->next = first_free_entry;
+		entry = irq_2_pin + entry->next;
+		if (++first_free_entry >= PIN_MAP_SIZE)
+			panic("io_apic.c: whoops");
+	}
+	entry->apic = apic;
+	entry->pin = pin;
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+				      int oldapic, int oldpin,
+				      int newapic, int newpin)
+{
+	struct irq_pin_list *entry = irq_2_pin + irq;
+
+	while (1) {
+		if (entry->apic == oldapic && entry->pin == oldpin) {
+			entry->apic = newapic;
+			entry->pin = newpin;
+		}
+		if (!entry->next)
+			break;
+		entry = irq_2_pin + entry->next;
+	}
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)					\
+									\
+{									\
+	int pin;							\
+	struct irq_pin_list *entry = irq_2_pin + irq;			\
+									\
+	for (;;) {							\
+		unsigned int reg;					\
+		pin = entry->pin;					\
+		if (pin == -1)						\
+			break;						\
+		reg = io_apic_read(entry->apic, 0x10 + R + pin*2);	\
+		reg ACTION;						\
+		io_apic_modify(entry->apic, reg);			\
+		if (!entry->next)					\
+			break;						\
+		entry = irq_2_pin + entry->next;			\
+	}								\
+	FINAL;								\
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL)					\
+									\
+	static void name##_IO_APIC_irq (unsigned int irq)		\
+	__DO_ACTION(R, ACTION, FINAL)
+
+DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
+						/* mask = 1 */
+DO_ACTION( __unmask,           0, &= 0xfffeffff, )
+						/* mask = 0 */
+DO_ACTION( __mask_and_edge,    0, = (reg & 0xffff7fff) | 0x00010000, )
+						/* mask = 1, trigger = 0 */
+DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, )
+						/* mask = 0, trigger = 1 */
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__mask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__unmask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+
+	/*
+	 * Disable it in the IO-APIC irq-routing table:
+	 */
+	memset(&entry, 0, sizeof(entry));
+	entry.mask = 1;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			clear_IO_APIC_pin(apic, pin);
+}
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+
+int skip_ioapic_setup;
+#if 0
+
+static int __init noioapic_setup(char *str)
+{
+	skip_ioapic_setup = 1;
+	return 1;
+}
+
+__setup("noapic", noioapic_setup);
+
+static int __init ioapic_setup(char *str)
+{
+	skip_ioapic_setup = 0;
+	return 1;
+}
+
+__setup("apic", ioapic_setup);
+
+
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	for (i = 0; i < MAX_PIRQS; i++)
+		pirq_entries[i] = -1;
+
+	pirqs_enabled = 1;
+	printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+#endif
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int __init find_irq_entry(int apic, int pin, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++)
+		if (mp_irqs[i].mpc_irqtype == type &&
+		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].mpc_dstirq == pin)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mpc_srcbus;
+
+		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+		     mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+		    (mp_irqs[i].mpc_irqtype == type) &&
+		    (mp_irqs[i].mpc_srcbusirq == irq))
+
+			return mp_irqs[i].mpc_dstirq;
+	}
+	return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+	int apic, i, best_guess = -1;
+
+	Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		bus, slot, pin);
+	if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
+		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].mpc_srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+				break;
+
+		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+		    !mp_irqs[i].mpc_irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+				return irq;
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0)
+				best_guess = irq;
+		}
+	}
+	return best_guess;
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int __init EISA_ELCR(unsigned int irq)
+{
+	if (irq < 16) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx)	(0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)	(0)
+#define default_ISA_polarity(idx)	(0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)	(1)
+#define default_PCI_polarity(idx)	(1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)	(1)
+#define default_MCA_polarity(idx)	(0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+	int bus = mp_irqs[idx].mpc_srcbus;
+	int polarity;
+
+	/*
+	 * Determine IRQ line polarity (high active or low active):
+	 */
+	switch (mp_irqs[idx].mpc_irqflag & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent polarity */
+		{
+			switch (mp_bus_id_to_type[bus])
+			{
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					polarity = default_ISA_polarity(idx);
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					polarity = default_EISA_polarity(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					polarity = default_PCI_polarity(idx);
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					polarity = default_MCA_polarity(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					polarity = 1;
+					break;
+				}
+			}
+			break;
+		}
+		case 1: /* high active */
+		{
+			polarity = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+		case 3: /* low active */
+		{
+			polarity = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+	}
+	return polarity;
+}
+
+static int __init MPBIOS_trigger(int idx)
+{
+	int bus = mp_irqs[idx].mpc_srcbus;
+	int trigger;
+
+	/*
+	 * Determine IRQ trigger mode (edge or level sensitive):
+	 */
+	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent */
+		{
+			switch (mp_bus_id_to_type[bus])
+			{
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					trigger = default_ISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					trigger = default_PCI_trigger(idx);
+					break;
+				}
+				case MP_BUS_MCA: /* MCA pin */
+				{
+					trigger = default_MCA_trigger(idx);
+					break;
+				}
+				default:
+				{
+					printk(KERN_WARNING "broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+			break;
+		}
+		case 1: /* edge */
+		{
+			trigger = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 1;
+			break;
+		}
+		case 3: /* level */
+		{
+			trigger = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			printk(KERN_WARNING "broken BIOS!!\n");
+			trigger = 0;
+			break;
+		}
+	}
+	return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+	return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+	return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+	int irq, i;
+	int bus = mp_irqs[idx].mpc_srcbus;
+
+	/*
+	 * Debugging check, we are in big trouble if this message pops up!
+	 */
+	if (mp_irqs[idx].mpc_dstirq != pin)
+		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+	switch (mp_bus_id_to_type[bus])
+	{
+		case MP_BUS_ISA: /* ISA pin */
+		case MP_BUS_EISA:
+		case MP_BUS_MCA:
+		{
+			irq = mp_irqs[idx].mpc_srcbusirq;
+			break;
+		}
+		case MP_BUS_PCI: /* PCI pin */
+		{
+			/*
+			 * PCI IRQs are mapped in order
+			 */
+			i = irq = 0;
+			while (i < apic)
+				irq += nr_ioapic_registers[i++];
+			irq += pin;
+			break;
+		}
+		default:
+		{
+			printk(KERN_ERR "unknown bus type %d.\n",bus); 
+			irq = 0;
+			break;
+		}
+	}
+
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+			} else {
+				irq = pirq_entries[pin-16];
+				printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+			}
+		}
+	}
+	return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	int apic, idx, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			idx = find_irq_entry(apic,pin,mp_INT);
+			if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+				return irq_trigger(idx);
+		}
+	}
+	/*
+	 * nonexistent IRQs are edge default
+	 */
+	return 0;
+}
+
+int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+
+static int __init assign_irq_vector(int irq)
+{
+	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+	if (IO_APIC_VECTOR(irq) > 0)
+		return IO_APIC_VECTOR(irq);
+next:
+	current_vector += 8;
+
+        /* XXX Skip the guestOS -> Xen syscall vector! XXX */
+	if (current_vector == HYPERVISOR_CALL_VECTOR) goto next;
+        /* XXX Skip the Linux/BSD fast-trap vector! XXX */
+        if (current_vector == 0x80) goto next;
+
+#if 0
+	if (current_vector == SYSCALL_VECTOR)
+		goto next;
+#endif
+
+	if (current_vector > FIRST_SYSTEM_VECTOR) {
+		offset++;
+		current_vector = FIRST_DEVICE_VECTOR + offset;
+	}
+
+	if (current_vector == FIRST_SYSTEM_VECTOR)
+		panic("ran out of interrupt sources!");
+
+	IO_APIC_VECTOR(irq) = current_vector;
+	return current_vector;
+}
+
+extern void (*interrupt[NR_IRQS])(void);
+static struct hw_interrupt_type ioapic_level_irq_type;
+static struct hw_interrupt_type ioapic_edge_irq_type;
+
+void __init setup_IO_APIC_irqs(void)
+{
+	struct IO_APIC_route_entry entry;
+	int apic, pin, idx, irq, first_notcon = 1, vector;
+	unsigned long flags;
+
+	printk(KERN_DEBUG "init IO_APIC IRQs\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+		/*
+		 * add it to the IO-APIC irq-routing table:
+		 */
+		memset(&entry,0,sizeof(entry));
+
+		entry.delivery_mode = INT_DELIVERY_MODE;
+		entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
+		entry.mask = 0;				/* enable IRQ */
+		entry.dest.logical.logical_dest = target_cpus();
+
+		idx = find_irq_entry(apic,pin,mp_INT);
+		if (idx == -1) {
+			if (first_notcon) {
+				printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+				first_notcon = 0;
+			} else
+				printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+			continue;
+		}
+
+		entry.trigger = irq_trigger(idx);
+		entry.polarity = irq_polarity(idx);
+
+		if (irq_trigger(idx)) {
+			entry.trigger = 1;
+			entry.mask = 1;
+		}
+
+		irq = pin_2_irq(idx, apic, pin);
+		/*
+		 * skip adding the timer int on secondary nodes, which causes
+		 * a small but painful rift in the time-space continuum
+		 */
+		if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) 
+			&& (apic != 0) && (irq == 0))
+			continue;
+		else
+			add_pin_to_irq(irq, apic, pin);
+
+		if (!apic && !IO_APIC_IRQ(irq))
+			continue;
+
+		if (IO_APIC_IRQ(irq)) {
+			vector = assign_irq_vector(irq);
+			entry.vector = vector;
+
+			if (IO_APIC_irq_trigger(irq))
+				irq_desc[irq].handler = &ioapic_level_irq_type;
+			else
+				irq_desc[irq].handler = &ioapic_edge_irq_type;
+
+			set_intr_gate(vector, interrupt[irq]);
+		
+			if (!apic && (irq < 16))
+				disable_8259A_irq(irq);
+		}
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+	}
+
+	if (!first_notcon)
+		printk(" not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin as broadcast to all
+ * CPUs.
+ */
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned long flags;
+
+	memset(&entry,0,sizeof(entry));
+
+	disable_8259A_irq(0);
+
+	/* mask LVT0 */
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+	/*
+	 * We use logical delivery to get the timer IRQ
+	 * to the first CPU.
+	 */
+	entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
+	entry.mask = 0;					/* unmask IRQ now */
+	entry.dest.logical.logical_dest = target_cpus();
+	entry.delivery_mode = INT_DELIVERY_MODE;
+	entry.polarity = 0;
+	entry.trigger = 0;
+	entry.vector = vector;
+
+	/*
+	 * The timer IRQ doesn't have to know that behind the
+	 * scene we have a 8259A-master in AEOI mode ...
+	 */
+	irq_desc[0].handler = &ioapic_edge_irq_type;
+
+	/*
+	 * Add it to the IO-APIC irq-routing table:
+	 */
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+	io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	enable_8259A_irq(0);
+}
+
+void __init UNEXPECTED_IO_APIC(void)
+{
+	printk(KERN_WARNING 
+		"An unexpected IO-APIC was found. If this kernel release is less than\n"
+		"three months old please report this to linux-smp@vger.kernel.org\n");
+}
+
+void __init print_IO_APIC(void)
+{
+	int apic, i;
+	struct IO_APIC_reg_00 reg_00;
+	struct IO_APIC_reg_01 reg_01;
+	struct IO_APIC_reg_02 reg_02;
+	unsigned long flags;
+
+ 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for (i = 0; i < nr_ioapics; i++)
+		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+	/*
+	 * We are a bit conservative about what we expect.  We have to
+	 * know about every hardware change ASAP.
+	 */
+	printk(KERN_INFO "testing the IO APIC.......................\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	*(int *)&reg_00 = io_apic_read(apic, 0);
+	*(int *)&reg_01 = io_apic_read(apic, 1);
+	if (reg_01.version >= 0x10)
+		*(int *)&reg_02 = io_apic_read(apic, 2);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	printk("\n");
+	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+	printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
+	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.ID);
+	if (reg_00.__reserved_1 || reg_00.__reserved_2)
+		UNEXPECTED_IO_APIC();
+
+	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.entries);
+	if (	(reg_01.entries != 0x0f) && /* older (Neptune) boards */
+		(reg_01.entries != 0x17) && /* typical ISA+PCI boards */
+		(reg_01.entries != 0x1b) && /* Compaq Proliant boards */
+		(reg_01.entries != 0x1f) && /* dual Xeon boards */
+		(reg_01.entries != 0x22) && /* bigger Xeon boards */
+		(reg_01.entries != 0x2E) &&
+		(reg_01.entries != 0x3F)
+	)
+		UNEXPECTED_IO_APIC();
+
+	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.PRQ);
+	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.version);
+	if (	(reg_01.version != 0x01) && /* 82489DX IO-APICs */
+		(reg_01.version != 0x02) && /* VIA */
+		(reg_01.version != 0x10) && /* oldest IO-APICs */
+		(reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
+		(reg_01.version != 0x13) && /* Xeon IO-APICs */
+		(reg_01.version != 0x20)    /* Intel P64H (82806 AA) */
+	)
+		UNEXPECTED_IO_APIC();
+	if (reg_01.__reserved_1 || reg_01.__reserved_2)
+		UNEXPECTED_IO_APIC();
+
+	if (reg_01.version >= 0x10) {
+		printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
+		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.arbitration);
+		if (reg_02.__reserved_1 || reg_02.__reserved_2)
+			UNEXPECTED_IO_APIC();
+	}
+
+#if 0
+	printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+	printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+			  " Stat Dest Deli Vect:   \n");
+
+	for (i = 0; i <= reg_01.entries; i++) {
+		struct IO_APIC_route_entry entry;
+
+		spin_lock_irqsave(&ioapic_lock, flags);
+		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		printk(KERN_DEBUG " %02x %03X %02X  ",
+			i,
+			entry.dest.logical.logical_dest,
+			entry.dest.physical.physical_dest
+		);
+
+		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+			entry.mask,
+			entry.trigger,
+			entry.irr,
+			entry.polarity,
+			entry.delivery_status,
+			entry.dest_mode,
+			entry.delivery_mode,
+			entry.vector
+		);
+	}
+	}
+	printk(KERN_DEBUG "IRQ to pin mappings:\n");
+	for (i = 0; i < NR_IRQS; i++) {
+		struct irq_pin_list *entry = irq_2_pin + i;
+		if (entry->pin < 0)
+			continue;
+		printk(KERN_DEBUG "IRQ%d ", i);
+		for (;;) {
+			printk("-> %d:%d", entry->apic, entry->pin);
+			if (!entry->next)
+				break;
+			entry = irq_2_pin + entry->next;
+		}
+		printk("\n");
+#endif
+	}
+
+	printk(KERN_INFO ".................................... done.\n");
+
+	return;
+}
+
+static void print_APIC_bitfield (int base)
+{
+	unsigned int v;
+	int i, j;
+
+	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+	for (i = 0; i < 8; i++) {
+		v = apic_read(base + i*0x10);
+		for (j = 0; j < 32; j++) {
+			if (v & (1<<j))
+				printk("1");
+			else
+				printk("0");
+		}
+		printk("\n");
+	}
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+	unsigned int v, ver, maxlvt;
+
+	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+		smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+	v = apic_read(APIC_LVR);
+	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	if (APIC_INTEGRATED(ver)) {			/* !82489DX */
+		v = apic_read(APIC_ARBPRI);
+		printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+			v & APIC_ARBPRI_MASK);
+		v = apic_read(APIC_PROCPRI);
+		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+	}
+
+	v = apic_read(APIC_EOI);
+	printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+	v = apic_read(APIC_RRR);
+	printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+	v = apic_read(APIC_LDR);
+	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+	v = apic_read(APIC_DFR);
+	printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+	v = apic_read(APIC_SPIV);
+	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+	printk(KERN_DEBUG "... APIC ISR field:\n");
+	print_APIC_bitfield(APIC_ISR);
+	printk(KERN_DEBUG "... APIC TMR field:\n");
+	print_APIC_bitfield(APIC_TMR);
+	printk(KERN_DEBUG "... APIC IRR field:\n");
+	print_APIC_bitfield(APIC_IRR);
+
+	if (APIC_INTEGRATED(ver)) {		/* !82489DX */
+		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+		v = apic_read(APIC_ESR);
+		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_ICR);
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+	v = apic_read(APIC_ICR2);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+	v = apic_read(APIC_LVTT);
+	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {                       /* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {			/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+	printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+	smp_call_function(print_local_APIC, NULL, 1, 1);
+	print_local_APIC(NULL);
+}
+
+void /*__init*/ print_PIC(void)
+{
+	extern spinlock_t i8259A_lock;
+	unsigned int v, flags;
+
+	printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+	outb(0x0b,0xa0);
+	outb(0x0b,0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a,0xa0);
+	outb(0x0a,0x20);
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+static void __init enable_IO_APIC(void)
+{
+	struct IO_APIC_reg_01 reg_01;
+	int i;
+	unsigned long flags;
+
+	for (i = 0; i < PIN_MAP_SIZE; i++) {
+		irq_2_pin[i].pin = -1;
+		irq_2_pin[i].next = 0;
+	}
+	if (!pirqs_enabled)
+		for (i = 0; i < MAX_PIRQS; i++)
+			pirq_entries[i] = -1;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (i = 0; i < nr_ioapics; i++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		*(int *)&reg_01 = io_apic_read(i, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[i] = reg_01.entries+1;
+	}
+
+	/*
+	 * Do not trust the IO-APIC being empty at bootup
+	 */
+	clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+	/*
+	 * Clear the IO-APIC before rebooting:
+	 */
+	clear_IO_APIC();
+
+	disconnect_bsp_APIC();
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc (void)
+{
+	struct IO_APIC_reg_00 reg_00;
+	unsigned long phys_id_present_map = phys_cpu_present_map;
+	int apic;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	if (clustered_apic_mode)
+		/* We don't have a good way to do this yet - hack */
+		phys_id_present_map = (u_long) 0xf;
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+
+		/* Read the register 0 value */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		*(int *)&reg_00 = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		
+		old_id = mp_ioapics[apic].mpc_apicid;
+
+		if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				apic, mp_ioapics[apic].mpc_apicid);
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.ID);
+			mp_ioapics[apic].mpc_apicid = reg_00.ID;
+		}
+
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
+		 */
+		if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
+		    (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				apic, mp_ioapics[apic].mpc_apicid);
+			for (i = 0; i < 0xf; i++)
+				if (!(phys_id_present_map & (1 << i)))
+					break;
+			if (i >= apic_broadcast_id)
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			phys_id_present_map |= 1 << i;
+			mp_ioapics[apic].mpc_apicid = i;
+		} else {
+			printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
+			phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
+		}
+
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mp_ioapics[apic].mpc_apicid)
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].mpc_dstapic == old_id)
+					mp_irqs[i].mpc_dstapic
+						= mp_ioapics[apic].mpc_apicid;
+
+		/*
+		 * Read the right value from the MPC table and
+		 * write it into the ID register.
+	 	 */
+		printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+					mp_ioapics[apic].mpc_apicid);
+
+		reg_00.ID = mp_ioapics[apic].mpc_apicid;
+		spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(apic, 0, *(int *)&reg_00);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		spin_lock_irqsave(&ioapic_lock, flags);
+		*(int *)&reg_00 = io_apic_read(apic, 0);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
+			panic("could not set ID!\n");
+		else
+			printk(" ok.\n");
+	}
+}
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *	- timer IRQ defaults to IO-APIC IRQ
+ *	- if this function detects that timer IRQs are defunct, then we fall
+ *	  back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+	unsigned int t1 = jiffies;
+
+	sti();
+	/* Let ten ticks pass... */
+	mdelay((10 * 1000) / HZ);
+
+	/*
+	 * Expect a few ticks at least, to be sure some possible
+	 * glue logic does not lock up after one or two first
+	 * ticks in a non-ExtINT mode.  Also the local APIC
+	 * might have cached one ExtINT interrupt.  Finally, at
+	 * least one tick may be lost due to delays.
+	 */
+	if (jiffies - t1 > 4)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+#define enable_edge_ioapic_irq unmask_IO_APIC_irq
+
+static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+	int was_pending = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	if (irq < 16) {
+		disable_8259A_irq(irq);
+		if (i8259A_irq_pending(irq))
+			was_pending = 1;
+	}
+	__unmask_IO_APIC_irq(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return was_pending;
+}
+
+#define shutdown_edge_ioapic_irq	disable_edge_ioapic_irq
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+					== (IRQ_PENDING | IRQ_DISABLED))
+		mask_IO_APIC_irq(irq);
+	ack_APIC_irq();
+}
+
+static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
+
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+	unmask_IO_APIC_irq(irq);
+
+	return 0; /* don't check for pending */
+}
+
+#define shutdown_level_ioapic_irq	mask_IO_APIC_irq
+#define enable_level_ioapic_irq		unmask_IO_APIC_irq
+#define disable_level_ioapic_irq	mask_IO_APIC_irq
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+	unsigned long v;
+	int i;
+
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+	i = IO_APIC_VECTOR(irq);
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	ack_APIC_irq();
+
+	if (!(v & (1 << (i & 0x1f)))) {
+#ifdef APIC_LOCKUP_DEBUG
+		struct irq_pin_list *entry;
+#endif
+
+#ifdef APIC_MISMATCH_DEBUG
+		atomic_inc(&irq_mis_count);
+#endif
+		spin_lock(&ioapic_lock);
+		__mask_and_edge_IO_APIC_irq(irq);
+#ifdef APIC_LOCKUP_DEBUG
+		for (entry = irq_2_pin + irq;;) {
+			unsigned int reg;
+
+			if (entry->pin == -1)
+				break;
+			reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
+			if (reg & 0x00004000)
+				printk(KERN_CRIT "Aieee!!!  Remote IRR"
+					" still set after unlock!\n");
+			if (!entry->next)
+				break;
+			entry = irq_2_pin + entry->next;
+		}
+#endif
+		__unmask_and_level_IO_APIC_irq(irq);
+		spin_unlock(&ioapic_lock);
+	}
+}
+
+static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+	unsigned long flags;
+	/*
+	 * Only the first 8 bits are valid.
+	 */
+	mask = mask << 24;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__DO_ACTION(1, = mask, )
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static struct hw_interrupt_type ioapic_edge_irq_type = {
+	"IO-APIC-edge",
+	startup_edge_ioapic_irq,
+	shutdown_edge_ioapic_irq,
+	enable_edge_ioapic_irq,
+	disable_edge_ioapic_irq,
+	ack_edge_ioapic_irq,
+	end_edge_ioapic_irq,
+	set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_irq_type = {
+	"IO-APIC-level",
+	startup_level_ioapic_irq,
+	shutdown_level_ioapic_irq,
+	enable_level_ioapic_irq,
+	disable_level_ioapic_irq,
+	mask_and_ack_level_ioapic_irq,
+	end_level_ioapic_irq,
+	set_ioapic_affinity,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+	int irq;
+
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	for (irq = 0; irq < NR_IRQS ; irq++) {
+		if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
+			/*
+			 * Hmm.. We don't have an entry for this,
+			 * so default to an old-fashioned 8259
+			 * interrupt if we can..
+			 */
+			if (irq < 16)
+				make_8259A_irq(irq);
+			else
+				/* Strange. Oh, well.. */
+				irq_desc[irq].handler = &no_irq_type;
+		}
+	}
+}
+
+static void enable_lapic_irq (unsigned int irq)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+	ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+	"local-APIC-edge",
+	NULL, /* startup_irq() not used for IRQ0 */
+	NULL, /* shutdown_irq() not used for IRQ0 */
+	enable_lapic_irq,
+	disable_lapic_irq,
+	ack_lapic_irq,
+	end_lapic_irq
+};
+
+static void enable_NMI_through_LVT0 (void * dummy)
+{
+	unsigned int v, ver;
+
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
+	v = APIC_DM_NMI;			/* unmask and set to NMI */
+	if (!APIC_INTEGRATED(ver))		/* 82489DX */
+		v |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write_around(APIC_LVT0, v);
+}
+
+static void setup_nmi (void)
+{
+	/*
+ 	 * Dirty trick to enable the NMI watchdog ...
+	 * We put the 8259A master into AEOI mode and
+	 * unmask on all local APICs LVT0 as NMI.
+	 *
+	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+	 * is from Maciej W. Rozycki - so we do not have to EOI from
+	 * the NMI handler or the timer interrupt.
+	 */ 
+	printk(KERN_INFO "activating NMI Watchdog ...");
+
+	smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1);
+	enable_NMI_through_LVT0(NULL);
+
+	printk(" done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+	int pin, i;
+	struct IO_APIC_route_entry entry0, entry1;
+	unsigned char save_control, save_freq_select;
+	unsigned long flags;
+
+	pin = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1)
+		return;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	*(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+	*(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+	clear_IO_APIC_pin(0, pin);
+
+	memset(&entry1, 0, sizeof(entry1));
+
+	entry1.dest_mode = 0;			/* physical delivery */
+	entry1.mask = 0;			/* unmask IRQ now */
+	entry1.dest.physical.physical_dest = hard_smp_processor_id();
+	entry1.delivery_mode = dest_ExtINT;
+	entry1.polarity = entry0.polarity;
+	entry1.trigger = 0;
+	entry1.vector = 0;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	save_control = CMOS_READ(RTC_CONTROL);
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+		   RTC_FREQ_SELECT);
+	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+	i = 100;
+	while (i-- > 0) {
+		mdelay(10);
+		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+			i -= 10;
+	}
+
+	CMOS_WRITE(save_control, RTC_CONTROL);
+	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	clear_IO_APIC_pin(0, pin);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+	io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+	extern int timer_ack;
+	int pin1, pin2;
+	int vector;
+
+	/*
+	 * get/set the timer IRQ vector:
+	 */
+	disable_8259A_irq(0);
+	vector = assign_irq_vector(0);
+	set_intr_gate(vector, interrupt[0]);
+
+	/*
+	 * Subtle, code in do_timer_interrupt() expects an AEOI
+	 * mode for the 8259A whenever interrupts are routed
+	 * through I/O APICs.  Also IRQ0 has to be enabled in
+	 * the 8259A which implies the virtual wire has to be
+	 * disabled in the local APIC.
+	 */
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+	init_8259A(1);
+	timer_ack = 1;
+	enable_8259A_irq(0);
+
+	pin1 = find_isa_irq_pin(0, mp_INT);
+	pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+	printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+	if (pin1 != -1) {
+		/*
+		 * Ok, does IRQ0 through the IOAPIC work?
+		 */
+		unmask_IO_APIC_irq(0);
+		if (timer_irq_works()) {
+			if (nmi_watchdog == NMI_IO_APIC) {
+				disable_8259A_irq(0);
+				setup_nmi();
+				enable_8259A_irq(0);
+				// XXX Xen check_nmi_watchdog();
+			}
+			return;
+		}
+		clear_IO_APIC_pin(0, pin1);
+		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+	}
+
+	printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+	if (pin2 != -1) {
+		printk("\n..... (found pin %d) ...", pin2);
+		/*
+		 * legacy devices should be connected to IO APIC #0
+		 */
+		setup_ExtINT_IRQ0_pin(pin2, vector);
+		if (timer_irq_works()) {
+			printk("works.\n");
+			if (pin1 != -1)
+				replace_pin_at_irq(0, 0, pin1, 0, pin2);
+			else
+				add_pin_to_irq(0, 0, pin2);
+			if (nmi_watchdog == NMI_IO_APIC) {
+				setup_nmi();
+				// XXX Xen check_nmi_watchdog();
+			}
+			return;
+		}
+		/*
+		 * Cleanup, just in case ...
+		 */
+		clear_IO_APIC_pin(0, pin2);
+	}
+	printk(" failed.\n");
+
+	if (nmi_watchdog) {
+		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+		nmi_watchdog = 0;
+	}
+
+	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+	disable_8259A_irq(0);
+	irq_desc[0].handler = &lapic_irq_type;
+	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
+	enable_8259A_irq(0);
+
+	if (timer_irq_works()) {
+		printk(" works.\n");
+		return;
+	}
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+	printk(" failed.\n");
+
+	printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+	init_8259A(0);
+	make_8259A_irq(0);
+	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+	unlock_ExtINT_logic();
+
+	if (timer_irq_works()) {
+		printk(" works.\n");
+		return;
+	}
+	printk(" failed :(.\n");
+	panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+}
+
+/*
+ *
+ * IRQ's that are handled by the old PIC in all cases:
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ *   Linux doesn't really care, as it's not actually used
+ *   for any interrupt handling anyway.
+ * - There used to be IRQ13 here as well, but all
+ *   MPS-compliant must not use it for FPU coupling and we
+ *   want to use exception 16 anyway.  And there are
+ *   systems who connect it to an I/O APIC for other uses.
+ *   Thus we don't mark it special any longer.
+ *
+ * Additionally, something is definitely wrong with irq9
+ * on PIIX4 boards.
+ */
+#define PIC_IRQS	(1<<2)
+
+void __init setup_IO_APIC(void)
+{
+	enable_IO_APIC();
+
+	io_apic_irqs = ~PIC_IRQS;
+	printk("ENABLING IO-APIC IRQs\n");
+
+	/*
+	 * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
+	 * mptable:
+	 */
+	setup_ioapic_ids_from_mpc();
+	sync_Arb_IDs();
+	setup_IO_APIC_irqs();
+	init_IO_APIC_traps();
+	check_timer();
+	print_IO_APIC();
+}
diff --git a/xen/arch/i386/ioremap.c b/xen/arch/i386/ioremap.c
new file mode 100644
index 0000000000..4ed7ba438d
--- /dev/null
+++ b/xen/arch/i386/ioremap.c
@@ -0,0 +1,106 @@
+/*
+ * arch/i386/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+//#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+
+static unsigned long remap_base = 0;
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY)
+
+#define PAGE_ALIGN(addr)    (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+static void new_l2e(l2_pgentry_t *pl2e)
+{
+    l1_pgentry_t *pl1e = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
+    if ( !pl1e ) BUG();
+    clear_page(pl1e);
+    *pl2e = mk_l2_pgentry(__pa(pl1e)|L2_PROT);
+}
+
+
+void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+    unsigned long vaddr;
+    unsigned long offset, cur=0, last_addr;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+
+    /* First time through, start allocating from far end of virtual memory. */
+    if ( !remap_base ) remap_base = IOREMAP_VIRT_START;
+
+    /* Don't allow wraparound or zero size */
+    last_addr = phys_addr + size - 1;
+    if (!size || last_addr < phys_addr)
+        return NULL;
+
+    /*
+     * Don't remap the low PCI/ISA area, it's always mapped..
+     */
+    if (phys_addr >= 0xA0000 && last_addr < 0x100000)
+        return phys_to_virt(phys_addr);
+
+#if 0
+    /*
+     * Don't allow anybody to remap normal RAM that we're using..
+     */
+    if (phys_addr < virt_to_phys(high_memory)) {
+        char *t_addr, *t_end;
+        struct pfn_info *page;
+
+        t_addr = __va(phys_addr);
+        t_end = t_addr + (size - 1);
+	   
+        for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+            if(!PageReserved(page))
+                return NULL;
+    }
+#endif
+
+    /*
+     * Mappings have to be page-aligned
+     */
+    offset = phys_addr & ~PAGE_MASK;
+    phys_addr &= PAGE_MASK;
+    size = PAGE_ALIGN(last_addr) - phys_addr;
+
+    /*
+     * Ok, go for it..
+     */
+    vaddr = remap_base;
+    remap_base += size;
+    pl2e = idle0_pg_table + l2_table_offset(vaddr);
+    if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e);
+    pl1e = l2_pgentry_to_l1(*pl2e++) + l1_table_offset(vaddr);
+    for ( ; ; ) 
+    {
+        if ( !l1_pgentry_empty(*pl1e) ) BUG();
+        *pl1e++ = mk_l1_pgentry((phys_addr+cur)|L1_PROT|flags);
+        cur += PAGE_SIZE;
+        if ( cur == size ) break;
+        if ( !((unsigned long)pl1e & (PAGE_SIZE-1)) )
+        {
+            if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e);
+            pl1e = l2_pgentry_to_l1(*pl2e++);        
+        }
+    }
+
+    flush_tlb_all();
+
+    return (void *) (offset + (char *)vaddr);
+}
+
+void iounmap(void *addr)
+{
+    /* NOP for now. */
+}
diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c
new file mode 100644
index 0000000000..e58fb8f2ad
--- /dev/null
+++ b/xen/arch/i386/irq.c
@@ -0,0 +1,895 @@
+/*
+ *	linux/arch/i386/kernel/irq.c
+ *
+ *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setup_irqs with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <xeno/slab.h>
+
+#include <asm/msr.h>
+#include <asm/hardirq.h>
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/pgalloc.h>
+#include <xeno/delay.h>
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
+    printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+    /*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 */
+    ack_APIC_irq();
+#endif
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none	disable_none
+#define end_none	enable_none
+
+struct hw_interrupt_type no_irq_type = {
+    "none",
+    startup_none,
+    shutdown_none,
+    enable_none,
+    disable_none,
+    ack_none,
+    end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = 0xff;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+	
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes.  I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+    for (;;) {
+
+        /*
+         * Wait until all interrupts are gone. Wait
+         * for bottom half handlers unless we're
+         * already executing in one..
+         */
+        if (!irqs_running())
+            if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+                break;
+
+        /* Duh, we have to loop. Release the lock to avoid deadlocks */
+        clear_bit(0,&global_irq_lock);
+
+        for (;;) {
+            __sti();
+            SYNC_OTHER_CORES(cpu);
+            __cli();
+            if (irqs_running())
+                continue;
+            if (global_irq_lock)
+                continue;
+            if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+                continue;
+            if (!test_and_set_bit(0,&global_irq_lock))
+                break;
+        }
+    }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+    if (irqs_running()) {
+        /* Stupid approach */
+        cli();
+        sti();
+    }
+}
+
+static inline void get_irqlock(int cpu)
+{
+    if (test_and_set_bit(0,&global_irq_lock)) {
+        /* do we already hold the lock? */
+        if ((unsigned char) cpu == global_irq_holder)
+            return;
+        /* Uhhuh.. Somebody else got it. Wait.. */
+        do {
+            do {
+                rep_nop();
+            } while (test_bit(0,&global_irq_lock));
+        } while (test_and_set_bit(0,&global_irq_lock));		
+    }
+    /* 
+     * We also to make sure that nobody else is running
+     * in an interrupt context. 
+     */
+    wait_on_irq(cpu);
+
+    /*
+     * Ok, finally..
+     */
+    global_irq_holder = cpu;
+}
+
+#define EFLAGS_IF_SHIFT 9
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+    unsigned int flags;
+
+    __save_flags(flags);
+    if (flags & (1 << EFLAGS_IF_SHIFT)) {
+        int cpu = smp_processor_id();
+        __cli();
+        if (!local_irq_count(cpu))
+            get_irqlock(cpu);
+    }
+}
+
+void __global_sti(void)
+{
+    int cpu = smp_processor_id();
+
+    if (!local_irq_count(cpu))
+        release_irqlock(cpu);
+    __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+    int retval;
+    int local_enabled;
+    unsigned long flags;
+    int cpu = smp_processor_id();
+
+    __save_flags(flags);
+    local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+    /* default to local */
+    retval = 2 + local_enabled;
+
+    /* check for global flags if we're not in an interrupt */
+    if (!local_irq_count(cpu)) {
+        if (local_enabled)
+            retval = 1;
+        if (global_irq_holder == cpu)
+            retval = 0;
+    }
+    return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+    switch (flags) {
+    case 0:
+        __global_cli();
+        break;
+    case 1:
+        __global_sti();
+        break;
+    case 2:
+        __cli();
+        break;
+    case 3:
+        __sti();
+        break;
+    default:
+        printk("global_restore_flags: %08lx (%08lx)\n",
+               flags, (&flags)[-1]);
+    }
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+{
+    int status;
+    int cpu = smp_processor_id();
+
+    irq_enter(cpu, irq);
+
+    status = 1;	/* Force the "do bottom halves" bit */
+
+    if (!(action->flags & SA_INTERRUPT))
+        __sti();
+
+    do {
+        status |= action->flags;
+        action->handler(irq, action->dev_id, regs);
+        action = action->next;
+    } while (action);
+
+    __cli();
+
+    irq_exit(cpu, irq);
+
+    return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock. 
+ */
+ 
+/**
+ *	disable_irq_nosync - disable an irq without waiting
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Disables and Enables are
+ *	nested.
+ *	Unlike disable_irq(), this function does not ensure existing
+ *	instances of the IRQ handler have completed before returning.
+ *
+ *	This function may be called from IRQ context.
+ */
+ 
+inline void disable_irq_nosync(unsigned int irq)
+{
+    irq_desc_t *desc = irq_desc + irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&desc->lock, flags);
+    if (!desc->depth++) {
+        desc->status |= IRQ_DISABLED;
+        desc->handler->disable(irq);
+    }
+    spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *	disable_irq - disable an irq and wait for completion
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Enables and Disables are
+ *	nested.
+ *	This function waits for any pending IRQ handlers for this interrupt
+ *	to complete before returning. If you use this function while
+ *	holding a resource the IRQ handler may need you will deadlock.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+ 
+void disable_irq(unsigned int irq)
+{
+    disable_irq_nosync(irq);
+
+    if (!local_irq_count(smp_processor_id())) {
+        do {
+            barrier();
+            cpu_relax();
+        } while (irq_desc[irq].status & IRQ_INPROGRESS);
+    }
+}
+
+/**
+ *	enable_irq - enable handling of an irq
+ *	@irq: Interrupt to enable
+ *
+ *	Undoes the effect of one call to disable_irq().  If this
+ *	matches the last disable, processing of interrupts on this
+ *	IRQ line is re-enabled.
+ *
+ *	This function may be called from IRQ context.
+ */
+ 
+void enable_irq(unsigned int irq)
+{
+    irq_desc_t *desc = irq_desc + irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&desc->lock, flags);
+    switch (desc->depth) {
+    case 1: {
+        unsigned int status = desc->status & ~IRQ_DISABLED;
+        desc->status = status;
+        if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+            desc->status = status | IRQ_REPLAY;
+            hw_resend_irq(desc->handler,irq);
+        }
+        desc->handler->enable(irq);
+        /* fall-through */
+    }
+    default:
+        desc->depth--;
+        break;
+    case 0:
+        printk("enable_irq(%u) unbalanced from %p\n", irq,
+               __builtin_return_address(0));
+    }
+    spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs regs)
+{	
+    /* 
+     * We ack quickly, we don't want the irq controller
+     * thinking we're snobs just because some other CPU has
+     * disabled global interrupts (we have already done the
+     * INT_ACK cycles, it's too late to try to pretend to the
+     * controller that we aren't taking the interrupt).
+     *
+     * 0 return value means that this irq is already being
+     * handled by some other CPU. (or is disabled)
+     */
+    int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code  */
+    int cpu = smp_processor_id();
+    irq_desc_t *desc = irq_desc + irq;
+    struct irqaction * action;
+    unsigned int status;
+
+    spin_lock(&desc->lock);
+    desc->handler->ack(irq);
+    /*
+      REPLAY is when Linux resends an IRQ that was dropped earlier
+      WAITING is used by probe to mark irqs that are being tested
+    */
+    status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+    status |= IRQ_PENDING; /* we _want_ to handle it */
+
+	/*
+	 * If the IRQ is disabled for whatever reason, we cannot
+	 * use the action we have.
+	 */
+    action = NULL;
+    if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+        action = desc->action;
+        status &= ~IRQ_PENDING; /* we commit to handling */
+        status |= IRQ_INPROGRESS; /* we are handling it */
+    }
+    desc->status = status;
+
+	/*
+	 * If there is no IRQ handler or it was disabled, exit early.
+	   Since we set PENDING, if another processor is handling
+	   a different instance of this same irq, the other processor
+	   will take care of it.
+	 */
+    if (!action)
+        goto out;
+
+	/*
+	 * Edge triggered interrupts need to remember
+	 * pending events.
+	 * This applies to any hw interrupts that allow a second
+	 * instance of the same irq to arrive while we are in do_IRQ
+	 * or in the handler. But the code here only handles the _second_
+	 * instance of the irq, not the third or fourth. So it is mostly
+	 * useful for irq hardware that does not mask cleanly in an
+	 * SMP environment.
+	 */
+    for (;;) {
+        spin_unlock(&desc->lock);
+        handle_IRQ_event(irq, &regs, action);
+        spin_lock(&desc->lock);
+		
+        if (!(desc->status & IRQ_PENDING))
+            break;
+        desc->status &= ~IRQ_PENDING;
+    }
+    desc->status &= ~IRQ_INPROGRESS;
+ out:
+    /*
+	 * The ->end() handler has to deal with interrupts which got
+	 * disabled while the handler was running.
+	 */
+    desc->handler->end(irq);
+    spin_unlock(&desc->lock);
+
+    if (softirq_pending(cpu))
+        do_softirq();
+
+    return 1;
+}
+
+/**
+ *	request_irq - allocate an interrupt line
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. From the point this
+ *	call is made your handler function may be invoked. Since
+ *	your handler function must clear any interrupt the board 
+ *	raises, you must take care both to initialise your hardware
+ *	and to set up the interrupt handler in the right order.
+ *
+ *	Dev_id must be globally unique. Normally the address of the
+ *	device data structure is used as the cookie. Since the handler
+ *	receives this value it makes sense to use it.
+ *
+ *	If your interrupt is shared you must pass a non NULL dev_id
+ *	as this is required when freeing the interrupt.
+ *
+ *	Flags:
+ *
+ *	SA_SHIRQ		Interrupt is shared
+ *
+ *	SA_INTERRUPT		Disable local interrupts while processing
+ */
+ 
+int request_irq(unsigned int irq, 
+		void (*handler)(int, void *, struct pt_regs *),
+		unsigned long irqflags, 
+		const char * devname,
+		void *dev_id)
+{
+    int retval;
+    struct irqaction * action;
+
+    if (irq >= NR_IRQS)
+        return -EINVAL;
+    if (!handler)
+        return -EINVAL;
+
+    action = (struct irqaction *)
+        kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+    if (!action)
+        return -ENOMEM;
+
+    action->handler = handler;
+    action->flags = irqflags;
+    action->mask = 0;
+    action->name = devname;
+    action->next = NULL;
+    action->dev_id = dev_id;
+
+    retval = setup_irq(irq, action);
+    if (retval)
+        kfree(action);
+
+    return retval;
+}
+
+/**
+ *	free_irq - free an interrupt
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Remove an interrupt handler. The handler is removed and if the
+ *	interrupt line is no longer in use by any driver it is disabled.
+ *	On a shared IRQ the caller must ensure the interrupt is disabled
+ *	on the card it drives before calling this function. The function
+ *	does not return until any executing interrupts for this IRQ
+ *	have completed.
+ *
+ *	This function may be called from interrupt context. 
+ *
+ *	Bugs: Attempting to free an irq in a handler for the same irq hangs
+ *	      the machine.
+ */
+ 
+void free_irq(unsigned int irq, void *dev_id)
+{
+    irq_desc_t *desc;
+    struct irqaction **p;
+    unsigned long flags;
+
+    if (irq >= NR_IRQS)
+        return;
+
+    desc = irq_desc + irq;
+    spin_lock_irqsave(&desc->lock,flags);
+    p = &desc->action;
+    for (;;) {
+        struct irqaction * action = *p;
+        if (action) {
+            struct irqaction **pp = p;
+            p = &action->next;
+            if (action->dev_id != dev_id)
+                continue;
+
+            /* Found it - now remove it from the list of entries */
+            *pp = action->next;
+            if (!desc->action) {
+                desc->status |= IRQ_DISABLED;
+                desc->handler->shutdown(irq);
+            }
+            spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+            /* Wait to make sure it's not being used on another CPU */
+            while (desc->status & IRQ_INPROGRESS) {
+                barrier();
+                cpu_relax();
+            }
+#endif
+            kfree(action);
+            return;
+        }
+        printk("Trying to free free IRQ%d\n",irq);
+        spin_unlock_irqrestore(&desc->lock,flags);
+        return;
+    }
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED;
+
+/**
+ *	probe_irq_on	- begin an interrupt autodetect
+ *
+ *	Commence probing for an interrupt. The interrupts are scanned
+ *	and a mask of potential interrupt lines is returned.
+ *
+ */
+ 
+unsigned long probe_irq_on(void)
+{
+    unsigned int i;
+    irq_desc_t *desc;
+    unsigned long val;
+    unsigned long s=0, e=0;
+
+    spin_lock(&probe_sem);
+    /* 
+     * something may have generated an irq long ago and we want to
+     * flush such a longstanding irq before considering it as spurious. 
+     */
+    for (i = NR_IRQS-1; i > 0; i--)  {
+        desc = irq_desc + i;
+
+        spin_lock_irq(&desc->lock);
+        if (!irq_desc[i].action) 
+            irq_desc[i].handler->startup(i);
+        spin_unlock_irq(&desc->lock);
+    }
+
+    /* Wait for longstanding interrupts to trigger (20ms delay). */
+    rdtscl(s);
+    do {
+        synchronize_irq();
+        rdtscl(e);
+    } while ( ((e-s)/ticks_per_usec) < 20000 );
+
+    /*
+     * enable any unassigned irqs
+     * (we must startup again here because if a longstanding irq
+     * happened in the previous stage, it may have masked itself)
+     */
+    for (i = NR_IRQS-1; i > 0; i--) {
+        desc = irq_desc + i;
+
+        spin_lock_irq(&desc->lock);
+        if (!desc->action) {
+            desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+            if (desc->handler->startup(i))
+                desc->status |= IRQ_PENDING;
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+
+    /*
+     * Wait for spurious interrupts to trigger (100ms delay). 
+     */
+    rdtscl(s);
+    do {
+        synchronize_irq();
+        rdtscl(e);
+    } while ( ((e-s)/ticks_per_usec) < 100000 );
+
+    /*
+     * Now filter out any obviously spurious interrupts
+     */
+    val = 0;
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc_t *desc = irq_desc + i;
+        unsigned int status;
+
+        spin_lock_irq(&desc->lock);
+        status = desc->status;
+
+        if (status & IRQ_AUTODETECT) {
+            /* It triggered already - consider it spurious. */
+            if (!(status & IRQ_WAITING)) {
+                desc->status = status & ~IRQ_AUTODETECT;
+                desc->handler->shutdown(i);
+            } else
+                if (i < 32)
+                    val |= 1 << i;
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+
+    return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+ 
+/**
+ *	probe_irq_mask - scan a bitmap of interrupt lines
+ *	@val:	mask of interrupts to consider
+ *
+ *	Scan the ISA bus interrupt lines and return a bitmap of
+ *	active interrupts. The interrupt probe logic state is then
+ *	returned to its previous value.
+ *
+ *	Note: we need to scan all the irq's even though we will
+ *	only return ISA irq numbers - just so that we reset them
+ *	all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+    int i;
+    unsigned int mask;
+
+    mask = 0;
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc_t *desc = irq_desc + i;
+        unsigned int status;
+
+        spin_lock_irq(&desc->lock);
+        status = desc->status;
+
+        if (status & IRQ_AUTODETECT) {
+            if (i < 16 && !(status & IRQ_WAITING))
+                mask |= 1 << i;
+
+            desc->status = status & ~IRQ_AUTODETECT;
+            desc->handler->shutdown(i);
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+    spin_unlock(&probe_sem);
+
+    return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ *	probe_irq_off	- end an interrupt autodetect
+ *	@val: mask of potential interrupts (unused)
+ *
+ *	Scans the unused interrupt lines and returns the line which
+ *	appears to have triggered the interrupt. If no interrupt was
+ *	found then zero is returned. If more than one interrupt is
+ *	found then minus the first candidate is returned to indicate
+ *	their is doubt.
+ *
+ *	The interrupt probe logic state is returned to its previous
+ *	value.
+ *
+ *	BUGS: When used in a module (which arguably shouldnt happen)
+ *	nothing prevents two IRQ probe callers from overlapping. The
+ *	results of this are non-optimal.
+ */
+ 
+int probe_irq_off(unsigned long val)
+{
+    int i, irq_found, nr_irqs;
+
+    nr_irqs = 0;
+    irq_found = 0;
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc_t *desc = irq_desc + i;
+        unsigned int status;
+
+        spin_lock_irq(&desc->lock);
+        status = desc->status;
+
+        if (status & IRQ_AUTODETECT) {
+            if (!(status & IRQ_WAITING)) {
+                if (!nr_irqs)
+                    irq_found = i;
+                nr_irqs++;
+            }
+            desc->status = status & ~IRQ_AUTODETECT;
+            desc->handler->shutdown(i);
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+    spin_unlock(&probe_sem);
+
+    if (nr_irqs > 1)
+        irq_found = -irq_found;
+    return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+    int shared = 0;
+    unsigned long flags;
+    struct irqaction *old, **p;
+    irq_desc_t *desc = irq_desc + irq;
+
+    /*
+     * The following block of code has to be executed atomically
+     */
+    spin_lock_irqsave(&desc->lock,flags);
+    p = &desc->action;
+    if ((old = *p) != NULL) {
+        /* Can't share interrupts unless both agree to */
+        if (!(old->flags & new->flags & SA_SHIRQ)) {
+            spin_unlock_irqrestore(&desc->lock,flags);
+            return -EBUSY;
+        }
+
+        /* add new interrupt at end of irq queue */
+        do {
+            p = &old->next;
+            old = *p;
+        } while (old);
+        shared = 1;
+    }
+
+    *p = new;
+
+    if (!shared) {
+        desc->depth = 0;
+        desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+        desc->handler->startup(irq);
+    }
+    spin_unlock_irqrestore(&desc->lock,flags);
+
+    return 0;
+}
diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c
new file mode 100644
index 0000000000..2d4d8ddf52
--- /dev/null
+++ b/xen/arch/i386/mm.c
@@ -0,0 +1,141 @@
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/init.h>
+#include <xeno/mm.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+
+static inline void set_pte_phys (unsigned long vaddr,
+                                 l1_pgentry_t entry)
+{
+    l2_pgentry_t *l2ent;
+    l1_pgentry_t *l1ent;
+
+    l2ent = idle0_pg_table + l2_table_offset(vaddr);
+    l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
+    *l1ent = entry;
+
+    /* It's enough to flush this one mapping. */
+    __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap (enum fixed_addresses idx, 
+                   l1_pgentry_t entry)
+{
+    unsigned long address = __fix_to_virt(idx);
+
+    if (idx >= __end_of_fixed_addresses) {
+        printk("Invalid __set_fixmap\n");
+        return;
+    }
+    set_pte_phys(address, entry);
+}
+
+static void __init fixrange_init (unsigned long start, 
+                                  unsigned long end, l2_pgentry_t *pg_base)
+{
+    l2_pgentry_t *l2e;
+    int i;
+    unsigned long vaddr, page;
+
+    vaddr = start;
+    i = l2_table_offset(vaddr);
+    l2e = pg_base + i;
+
+    for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) 
+    {
+        if ( !l2_pgentry_empty(*l2e) ) continue;
+        page = (unsigned long)get_free_page(GFP_KERNEL);
+        clear_page(page);
+        *l2e = mk_l2_pgentry(__pa(page) | PAGE_HYPERVISOR);
+        vaddr += 1 << L2_PAGETABLE_SHIFT;
+    }
+}
+
+void __init paging_init(void)
+{
+    unsigned long addr;
+    void *ioremap_pt;
+
+    /* XXX initialised in boot.S */
+    /*if ( cpu_has_pge ) set_in_cr4(X86_CR4_PGE);*/
+    /*if ( cpu_has_pse ) set_in_cr4(X86_CR4_PSE);*/
+    /*if ( cpu_has_pae ) set_in_cr4(X86_CR4_PAE);*/
+
+    /*
+     * Fixed mappings, only the page table structure has to be
+     * created - mappings will be set by set_fixmap():
+     */
+    addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
+    fixrange_init(addr, 0, idle0_pg_table);
+
+    /* Create page table for ioremap(). */
+    ioremap_pt = (void *)get_free_page(GFP_KERNEL);
+    clear_page(ioremap_pt);
+    idle0_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] = 
+        mk_l2_pgentry(__pa(ioremap_pt) | PAGE_HYPERVISOR);
+
+    /* Create read-only mapping of MPT for guest-OS use. */
+    idle0_pg_table[READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        idle0_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT];
+    mk_l2_readonly(idle0_pg_table + 
+                   (READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT));
+}
+
+void __init zap_low_mappings (void)
+{
+    int i, j;
+    for ( i = 0; i < smp_num_cpus; i++ )
+    {
+        for ( j = 0; j < DOMAIN_ENTRIES_PER_L2_PAGETABLE; j++ )
+        {
+            idle_pg_table[i][j] = mk_l2_pgentry(0);
+        }
+    }
+    flush_tlb_all();
+}
+
+
+long do_stack_and_ldt_switch(
+    unsigned long ss, unsigned long esp, unsigned long ldts)
+{
+    int nr = smp_processor_id();
+    struct tss_struct *t = &init_tss[nr];
+
+    if ( (ss == __HYPERVISOR_CS) || (ss == __HYPERVISOR_DS) )
+        return -1;
+
+    if ( ldts != current->mm.ldt_sel )
+    {
+        unsigned long *ptabent;
+        ptabent = (unsigned long *)GET_GDT_ADDRESS(current);
+        /* Out of range for GDT table? */
+        if ( (ldts * 8) > GET_GDT_ENTRIES(current) ) return -1;
+        ptabent += ldts * 2; /* 8 bytes per desc == 2 * unsigned long */
+        /* Not an LDT entry? (S=0b, type =0010b) */
+        if ( (*ptabent & 0x00001f00) != 0x00000200 ) return -1;
+        current->mm.ldt_sel = ldts;
+        __load_LDT(ldts);
+    }
+
+    current->thread.ss1  = ss;
+    current->thread.esp1 = esp;
+    t->ss1  = ss;
+    t->esp1 = esp;
+
+    return 0;
+}
+
+
+long do_set_gdt(unsigned long *frame_list, int entries)
+{
+    return -ENOSYS;
+}
+
+
+long do_update_descriptor(
+    unsigned long pa, unsigned long word1, unsigned long word2)
+{
+    return -ENOSYS;
+}
diff --git a/xen/arch/i386/mpparse.c b/xen/arch/i386/mpparse.c
new file mode 100644
index 0000000000..0e2ca870a9
--- /dev/null
+++ b/xen/arch/i386/mpparse.c
@@ -0,0 +1,944 @@
+/*
+ *	Intel Multiprocessor Specificiation 1.1 and 1.4
+ *	compliant MP-table parsing routines.
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *		Erich Boleyn	:	MP v1.4 and additional changes.
+ *		Alan Cox	:	Added EBDA scanning
+ *		Ingo Molnar	:	various cleanups and rewrites
+ *	Maciej W. Rozycki	:	Bits for default MP configurations
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <asm/io.h>
+#include <xeno/irq.h>
+#include <xeno/smp.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
+#include <xeno/kernel.h>
+
+int numnodes = 1; /* XXX Xen */
+
+/* Have we found an MP table */
+int smp_found_config;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_current_pci_id;
+int *mp_bus_id_to_type;
+int *mp_bus_id_to_node;
+int *mp_bus_id_to_local;
+int *mp_bus_id_to_pci_bus;
+int max_mp_busses;
+int max_irq_sources;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc *mp_irqs;
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int num_processors;
+
+/* Bitmask of physically existing CPUs */
+unsigned long phys_cpu_present_map;
+unsigned long logical_cpu_present_map;
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+unsigned char esr_disable = 0;
+unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
+unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+#endif
+unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+#ifndef CONFIG_X86_VISWS_APIC
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+	int sum = 0;
+
+	while (len--)
+		sum += *mp++;
+
+	return sum & 0xFF;
+}
+
+/*
+ * Processor encoding in an MP configuration block
+ */
+
+static char __init *mpc_family(int family,int model)
+{
+	static char n[32];
+	static char *model_defs[]=
+	{
+		"80486DX","80486DX",
+		"80486SX","80486DX/2 or 80487",
+		"80486SL","80486SX/2",
+		"Unknown","80486DX/2-WB",
+		"80486DX/4","80486DX/4-WB"
+	};
+
+	switch (family) {
+		case 0x04:
+			if (model < 10)
+				return model_defs[model];
+			break;
+
+		case 0x05:
+			return("Pentium(tm)");
+
+		case 0x06:
+			return("Pentium(tm) Pro");
+
+		case 0x0F:
+			if (model == 0x00)
+				return("Pentium 4(tm)");
+			if (model == 0x02)
+				return("Pentium 4(tm) XEON(tm)");
+			if (model == 0x0F)
+				return("Special controller");
+	}
+	sprintf(n,"Unknown CPU [%d:%d]",family, model);
+	return n;
+}
+
+#ifdef CONFIG_X86_IO_APIC
+// XXX Xen extern int have_acpi_tables;	/* set by acpitable.c */
+#define have_acpi_tables (0)
+#else
+#define have_acpi_tables (0)
+#endif
+
+/* 
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record; 
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
+
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ 	int ver, quad, logical_apicid;
+ 	
+	if (!(m->mpc_cpuflag & CPU_ENABLED))
+		return;
+
+	logical_apicid = m->mpc_apicid;
+	if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+		quad = translation_table[mpc_record]->trans_quad;
+		logical_apicid = (quad << 4) + 
+			(m->mpc_apicid ? m->mpc_apicid << 1 : 1);
+		printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
+			m->mpc_apicid,
+			mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+				   (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+			m->mpc_apicver, quad, logical_apicid);
+	} else {
+		printk("Processor #%d %s APIC version %d\n",
+			m->mpc_apicid,
+			mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+				   (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+			m->mpc_apicver);
+	}
+
+	if (m->mpc_featureflag&(1<<0))
+		Dprintk("    Floating point unit present.\n");
+	if (m->mpc_featureflag&(1<<7))
+		Dprintk("    Machine Exception supported.\n");
+	if (m->mpc_featureflag&(1<<8))
+		Dprintk("    64 bit compare & exchange supported.\n");
+	if (m->mpc_featureflag&(1<<9))
+		Dprintk("    Internal APIC present.\n");
+	if (m->mpc_featureflag&(1<<11))
+		Dprintk("    SEP present.\n");
+	if (m->mpc_featureflag&(1<<12))
+		Dprintk("    MTRR  present.\n");
+	if (m->mpc_featureflag&(1<<13))
+		Dprintk("    PGE  present.\n");
+	if (m->mpc_featureflag&(1<<14))
+		Dprintk("    MCA  present.\n");
+	if (m->mpc_featureflag&(1<<15))
+		Dprintk("    CMOV  present.\n");
+	if (m->mpc_featureflag&(1<<16))
+		Dprintk("    PAT  present.\n");
+	if (m->mpc_featureflag&(1<<17))
+		Dprintk("    PSE  present.\n");
+	if (m->mpc_featureflag&(1<<18))
+		Dprintk("    PSN  present.\n");
+	if (m->mpc_featureflag&(1<<19))
+		Dprintk("    Cache Line Flush Instruction present.\n");
+	/* 20 Reserved */
+	if (m->mpc_featureflag&(1<<21))
+		Dprintk("    Debug Trace and EMON Store present.\n");
+	if (m->mpc_featureflag&(1<<22))
+		Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+	if (m->mpc_featureflag&(1<<23))
+		Dprintk("    MMX  present.\n");
+	if (m->mpc_featureflag&(1<<24))
+		Dprintk("    FXSR  present.\n");
+	if (m->mpc_featureflag&(1<<25))
+		Dprintk("    XMM  present.\n");
+	if (m->mpc_featureflag&(1<<26))
+		Dprintk("    Willamette New Instructions  present.\n");
+	if (m->mpc_featureflag&(1<<27))
+		Dprintk("    Self Snoop  present.\n");
+	if (m->mpc_featureflag&(1<<28))
+		Dprintk("    HT  present.\n");
+	if (m->mpc_featureflag&(1<<29))
+		Dprintk("    Thermal Monitor present.\n");
+	/* 30, 31 Reserved */
+
+
+	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+		Dprintk("    Bootup CPU\n");
+		boot_cpu_physical_apicid = m->mpc_apicid;
+		boot_cpu_logical_apicid = logical_apicid;
+	}
+
+	num_processors++;
+
+	if (m->mpc_apicid > MAX_APICS) {
+		printk("Processor #%d INVALID. (Max ID: %d).\n",
+			m->mpc_apicid, MAX_APICS);
+		--num_processors;
+		return;
+	}
+	ver = m->mpc_apicver;
+
+	logical_cpu_present_map |= 1 << (num_processors-1);
+ 	phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
+ 
+	/*
+	 * Validate version
+	 */
+	if (ver == 0x0) {
+		printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+		ver = 0x10;
+	}
+	apic_version[m->mpc_apicid] = ver;
+	raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
+}
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+	char str[7];
+	int quad;
+
+	memcpy(str, m->mpc_bustype, 6);
+	str[6] = 0;
+	
+	if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+		quad = translation_table[mpc_record]->trans_quad;
+		mp_bus_id_to_node[m->mpc_busid] = quad;
+		mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
+		quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
+		printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
+	} else {
+		Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+	}
+
+	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+		mp_current_pci_id++;
+	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+	} else {
+		printk("Unknown bustype %s - ignoring\n", str);
+	}
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+	if (!(m->mpc_flags & MPC_APIC_USABLE))
+		return;
+
+	printk("I/O APIC #%d Version %d at 0x%lX.\n",
+		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+			MAX_IO_APICS, nr_ioapics);
+		panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+	}
+	if (!m->mpc_apicaddr) {
+		printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+			" found in MP table, skipping!\n");
+		return;
+	}
+	mp_ioapics[nr_ioapics] = *m;
+	nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+	mp_irqs [mp_irq_entries] = *m;
+	Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
+			m->mpc_irqtype, m->mpc_irqflag & 3,
+			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+	if (++mp_irq_entries == max_irq_sources)
+		panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+	Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+			m->mpc_irqtype, m->mpc_irqflag & 3,
+			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+	/*
+	 * Well it seems all SMP boards in existence
+	 * use ExtINT/LVT1 == LINT0 and
+	 * NMI/LVT2 == LINT1 - the following check
+	 * will show us if this assumptions is false.
+	 * Until then we do not have to add baggage.
+	 */
+	if ((m->mpc_irqtype == mp_ExtINT) &&
+		(m->mpc_destapiclint != 0))
+			BUG();
+	if ((m->mpc_irqtype == mp_NMI) &&
+		(m->mpc_destapiclint != 1))
+			BUG();
+}
+
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+	printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+
+	if (mpc_record >= MAX_MPC_ENTRY) 
+		printk("MAX_MPC_ENTRY exceeded!\n");
+	else
+		translation_table[mpc_record] = m; /* stash this for later */
+	if (m->trans_quad+1 > numnodes)
+		numnodes = m->trans_quad+1;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+	unsigned short oemsize)
+{
+	int count = sizeof (*oemtable); /* the header size */
+	unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+	
+	printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+	if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+	{
+		printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+			oemtable->oem_signature[0],
+			oemtable->oem_signature[1],
+			oemtable->oem_signature[2],
+			oemtable->oem_signature[3]);
+		return;
+	}
+	if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+	{
+		printk("SMP oem mptable: checksum error!\n");
+		return;
+	}
+	while (count < oemtable->oem_length) {
+		switch (*oemptr) {
+			case MP_TRANSLATION:
+			{
+				struct mpc_config_translation *m=
+					(struct mpc_config_translation *)oemptr;
+				MP_translation_info(m);
+				oemptr += sizeof(*m);
+				count += sizeof(*m);
+				++mpc_record;
+				break;
+			}
+			default:
+			{
+				printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+				return;
+			}
+		}
+       }
+}
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+	char oem[16], prod[14];
+	int count=sizeof(*mpc);
+	unsigned char *mpt=((unsigned char *)mpc)+count;
+	int num_bus = 0;
+	int num_irq = 0;
+	unsigned char *bus_data;
+
+	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+		panic("SMP mptable: bad signature [%c%c%c%c]!\n",
+			mpc->mpc_signature[0],
+			mpc->mpc_signature[1],
+			mpc->mpc_signature[2],
+			mpc->mpc_signature[3]);
+		return 0;
+	}
+	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+		panic("SMP mptable: checksum error!\n");
+		return 0;
+	}
+	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+		printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+			mpc->mpc_spec);
+		return 0;
+	}
+	if (!mpc->mpc_lapic) {
+		printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+		return 0;
+	}
+	memcpy(oem,mpc->mpc_oem,8);
+	oem[8]=0;
+	printk("OEM ID: %s ",oem);
+
+	memcpy(prod,mpc->mpc_productid,12);
+	prod[12]=0;
+	printk("Product ID: %s ",prod);
+
+	detect_clustered_apic(oem, prod);
+	
+	printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+	/* save the local APIC address, it might be non-default,
+	 * but only if we're not using the ACPI tables
+	 */
+	if (!have_acpi_tables)
+		mp_lapic_addr = mpc->mpc_lapic;
+
+	if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
+		/* We need to process the oem mpc tables to tell us which quad things are in ... */
+		mpc_record = 0;
+		smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize);
+		mpc_record = 0;
+	}
+
+	/* Pre-scan to determine the number of bus and 
+	 * interrupts records we have
+	 */
+	while (count < mpc->mpc_length) {
+		switch (*mpt) {
+			case MP_PROCESSOR:
+				mpt += sizeof(struct mpc_config_processor);
+				count += sizeof(struct mpc_config_processor);
+				break;
+			case MP_BUS:
+				++num_bus;
+				mpt += sizeof(struct mpc_config_bus);
+				count += sizeof(struct mpc_config_bus);
+				break;
+			case MP_INTSRC:
+				++num_irq;
+				mpt += sizeof(struct mpc_config_intsrc);
+				count += sizeof(struct mpc_config_intsrc);
+				break;
+			case MP_IOAPIC:
+				mpt += sizeof(struct mpc_config_ioapic);
+				count += sizeof(struct mpc_config_ioapic);
+				break;
+			case MP_LINTSRC:
+				mpt += sizeof(struct mpc_config_lintsrc);
+				count += sizeof(struct mpc_config_lintsrc);
+				break;
+			default:
+				count = mpc->mpc_length;
+				break;
+		}
+	}
+	/* 
+	 * Paranoia: Allocate one extra of both the number of busses and number
+	 * of irqs, and make sure that we have at least 4 interrupts per PCI
+	 * slot.  But some machines do not report very many busses, so we need
+	 * to fall back on the older defaults.
+	 */
+	++num_bus;
+	max_mp_busses = max(num_bus, MAX_MP_BUSSES);
+	if (num_irq < (4 * max_mp_busses))
+		num_irq = 4 * num_bus;	/* 4 intr/PCI slot */
+	++num_irq;
+	max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
+	
+	count = (max_mp_busses * sizeof(int)) * 4;
+	count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
+	
+	{
+	//bus_data = alloc_bootmem(count);  XXX Xen
+	static char arr[4096];
+	if(count > 4096) BUG();
+	bus_data = (void*)arr;
+	
+	}
+	if (!bus_data) {
+		printk(KERN_ERR "SMP mptable: out of memory!\n");
+		return 0;
+	}
+	mp_bus_id_to_type = (int *)&bus_data[0];
+	mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
+	mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
+	mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
+	mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
+	memset(mp_bus_id_to_pci_bus, -1, max_mp_busses);
+
+	/*
+	 *	Now process the configuration blocks.
+	 */
+	count = sizeof(*mpc);
+	mpt = ((unsigned char *)mpc)+count;
+	while (count < mpc->mpc_length) {
+		switch(*mpt) {
+			case MP_PROCESSOR:
+			{
+				struct mpc_config_processor *m=
+					(struct mpc_config_processor *)mpt;
+
+				/* ACPI may already have provided this one for us */
+				if (!have_acpi_tables)
+					MP_processor_info(m);
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+			case MP_BUS:
+			{
+				struct mpc_config_bus *m=
+					(struct mpc_config_bus *)mpt;
+				MP_bus_info(m);
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+			case MP_IOAPIC:
+			{
+				struct mpc_config_ioapic *m=
+					(struct mpc_config_ioapic *)mpt;
+				MP_ioapic_info(m);
+				mpt+=sizeof(*m);
+				count+=sizeof(*m);
+				break;
+			}
+			case MP_INTSRC:
+			{
+				struct mpc_config_intsrc *m=
+					(struct mpc_config_intsrc *)mpt;
+
+				MP_intsrc_info(m);
+				mpt+=sizeof(*m);
+				count+=sizeof(*m);
+				break;
+			}
+			case MP_LINTSRC:
+			{
+				struct mpc_config_lintsrc *m=
+					(struct mpc_config_lintsrc *)mpt;
+				MP_lintsrc_info(m);
+				mpt+=sizeof(*m);
+				count+=sizeof(*m);
+				break;
+			}
+			default:
+			{
+				count = mpc->mpc_length;
+				break;
+			}
+		}
+		++mpc_record;
+	}
+
+	if (clustered_apic_mode){
+		phys_cpu_present_map = logical_cpu_present_map;
+	}
+
+
+	printk("Enabling APIC mode: ");
+	if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+		printk("Clustered Logical.	");
+	else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+		printk("Physical.	");
+	else
+		printk("Flat.	");
+	printk("Using %d I/O APICs\n",nr_ioapics);
+
+	if (!num_processors)
+		printk(KERN_ERR "SMP mptable: no processors registered!\n");
+	return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+	unsigned int port;
+
+	port = 0x4d0 + (irq >> 3);
+	return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+	struct mpc_config_intsrc intsrc;
+	int i;
+	int ELCR_fallback = 0;
+
+	intsrc.mpc_type = MP_INTSRC;
+	intsrc.mpc_irqflag = 0;			/* conforming */
+	intsrc.mpc_srcbus = 0;
+	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+	intsrc.mpc_irqtype = mp_INT;
+
+	/*
+	 *  If true, we have an ISA/PCI system with no IRQ entries
+	 *  in the MP table. To prevent the PCI interrupts from being set up
+	 *  incorrectly, we try to use the ELCR. The sanity check to see if
+	 *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+	 *  never be level sensitive, so we simply see if the ELCR agrees.
+	 *  If it does, we assume it's valid.
+	 */
+	if (mpc_default_type == 5) {
+		printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+			printk("ELCR contains invalid data... not using ELCR\n");
+		else {
+			printk("Using ELCR to identify PCI interrupts\n");
+			ELCR_fallback = 1;
+		}
+	}
+
+	for (i = 0; i < 16; i++) {
+		switch (mpc_default_type) {
+		case 2:
+			if (i == 0 || i == 13)
+				continue;	/* IRQ0 & IRQ13 not connected */
+			/* fall through */
+		default:
+			if (i == 2)
+				continue;	/* IRQ2 is never connected */
+		}
+
+		if (ELCR_fallback) {
+			/*
+			 *  If the ELCR indicates a level-sensitive interrupt, we
+			 *  copy that information over to the MP table in the
+			 *  irqflag field (level sensitive, active high polarity).
+			 */
+			if (ELCR_trigger(i))
+				intsrc.mpc_irqflag = 13;
+			else
+				intsrc.mpc_irqflag = 0;
+		}
+
+		intsrc.mpc_srcbusirq = i;
+		intsrc.mpc_dstirq = i ? i : 2;		/* IRQ0 to INTIN2 */
+		MP_intsrc_info(&intsrc);
+	}
+
+	intsrc.mpc_irqtype = mp_ExtINT;
+	intsrc.mpc_srcbusirq = 0;
+	intsrc.mpc_dstirq = 0;				/* 8259A to INTIN0 */
+	MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+	struct mpc_config_processor processor;
+	struct mpc_config_bus bus;
+	struct mpc_config_ioapic ioapic;
+	struct mpc_config_lintsrc lintsrc;
+	int linttypes[2] = { mp_ExtINT, mp_NMI };
+	int i;
+
+	/*
+	 * local APIC has default address
+	 */
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/*
+	 * 2 CPUs, numbered 0 & 1.
+	 */
+	processor.mpc_type = MP_PROCESSOR;
+	/* Either an integrated APIC or a discrete 82489DX. */
+	processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+	processor.mpc_cpuflag = CPU_ENABLED;
+	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+				   (boot_cpu_data.x86_model << 4) |
+				   boot_cpu_data.x86_mask;
+	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+	processor.mpc_reserved[0] = 0;
+	processor.mpc_reserved[1] = 0;
+	for (i = 0; i < 2; i++) {
+		processor.mpc_apicid = i;
+		MP_processor_info(&processor);
+	}
+
+	bus.mpc_type = MP_BUS;
+	bus.mpc_busid = 0;
+	switch (mpc_default_type) {
+		default:
+			printk("???\nUnknown standard configuration %d\n",
+				mpc_default_type);
+			/* fall through */
+		case 1:
+		case 5:
+			memcpy(bus.mpc_bustype, "ISA   ", 6);
+			break;
+		case 2:
+		case 6:
+		case 3:
+			memcpy(bus.mpc_bustype, "EISA  ", 6);
+			break;
+		case 4:
+		case 7:
+			memcpy(bus.mpc_bustype, "MCA   ", 6);
+	}
+	MP_bus_info(&bus);
+	if (mpc_default_type > 4) {
+		bus.mpc_busid = 1;
+		memcpy(bus.mpc_bustype, "PCI   ", 6);
+		MP_bus_info(&bus);
+	}
+
+	ioapic.mpc_type = MP_IOAPIC;
+	ioapic.mpc_apicid = 2;
+	ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+	ioapic.mpc_flags = MPC_APIC_USABLE;
+	ioapic.mpc_apicaddr = 0xFEC00000;
+	MP_ioapic_info(&ioapic);
+
+	/*
+	 * We set up most of the low 16 IO-APIC pins according to MPS rules.
+	 */
+	construct_default_ioirq_mptable(mpc_default_type);
+
+	lintsrc.mpc_type = MP_LINTSRC;
+	lintsrc.mpc_irqflag = 0;		/* conforming */
+	lintsrc.mpc_srcbusid = 0;
+	lintsrc.mpc_srcbusirq = 0;
+	lintsrc.mpc_destapic = MP_APIC_ALL;
+	for (i = 0; i < 2; i++) {
+		lintsrc.mpc_irqtype = linttypes[i];
+		lintsrc.mpc_destapiclint = i;
+		MP_lintsrc_info(&lintsrc);
+	}
+}
+
+static struct intel_mp_floating *mpf_found;
+extern void 	config_acpi_tables(void);
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+	struct intel_mp_floating *mpf = mpf_found;
+
+#ifdef CONFIG_X86_IO_APIC
+	/*
+	 * Check if the ACPI tables are provided. Use them only to get
+	 * the processor information, mainly because it provides
+	 * the info on the logical processor(s), rather than the physical
+	 * processor(s) that are provided by the MPS. We attempt to 
+	 * check only if the user provided a commandline override
+	 */
+        config_acpi_tables();
+#endif
+	
+	printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+	if (mpf->mpf_feature2 & (1<<7)) {
+		printk("    IMCR and PIC compatibility mode.\n");
+		pic_mode = 1;
+	} else {
+		printk("    Virtual Wire compatibility mode.\n");
+		pic_mode = 0;
+	}
+
+	/*
+	 * Now see if we need to read further.
+	 */
+	if (mpf->mpf_feature1 != 0) {
+
+		printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+		construct_default_ISA_mptable(mpf->mpf_feature1);
+
+	} else if (mpf->mpf_physptr) {
+
+		/*
+		 * Read the physical hardware table.  Anything here will
+		 * override the defaults.
+		 */
+		if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+			smp_found_config = 0;
+			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+			return;
+		}
+		/*
+		 * If there are no explicit MP IRQ entries, then we are
+		 * broken.  We set up most of the low 16 IO-APIC pins to
+		 * ISA defaults and hope it will work.
+		 */
+		if (!mp_irq_entries) {
+			struct mpc_config_bus bus;
+
+			printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+			bus.mpc_type = MP_BUS;
+			bus.mpc_busid = 0;
+			memcpy(bus.mpc_bustype, "ISA   ", 6);
+			MP_bus_info(&bus);
+
+			construct_default_ioirq_mptable(0);
+		}
+
+	} else
+		BUG();
+
+	printk("Processors: %d\n", num_processors);
+	/*
+	 * Only use the first configuration found.
+	 */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+	unsigned long *bp = phys_to_virt(base);
+	struct intel_mp_floating *mpf;
+
+	Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+	if (sizeof(*mpf) != 16)
+		printk("Error: MPF size\n");
+
+	while (length > 0) {
+		mpf = (struct intel_mp_floating *)bp;
+		if ((*bp == SMP_MAGIC_IDENT) &&
+			(mpf->mpf_length == 1) &&
+			!mpf_checksum((unsigned char *)bp, 16) &&
+			((mpf->mpf_specification == 1)
+				|| (mpf->mpf_specification == 4)) ) {
+
+			smp_found_config = 1;
+			printk("found SMP MP-table at %08lx\n",
+						virt_to_phys(mpf));
+			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+			if (mpf->mpf_physptr)
+				reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
+			mpf_found = mpf;
+			return 1;
+		}
+		bp += 4;
+		length -= 16;
+	}
+	return 0;
+}
+
+void __init find_intel_smp (void)
+{
+	unsigned int address;
+
+	/*
+	 * FIXME: Linux assumes you have 640K of base ram..
+	 * this continues the error...
+	 *
+	 * 1) Scan the bottom 1K for a signature
+	 * 2) Scan the top 1K of base RAM
+	 * 3) Scan the 64K of bios
+	 */
+	if (smp_scan_config(0x0,0x400) ||
+		smp_scan_config(639*0x400,0x400) ||
+			smp_scan_config(0xF0000,0x10000))
+		return;
+	/*
+	 * If it is an SMP machine we should know now, unless the
+	 * configuration is in an EISA/MCA bus machine with an
+	 * extended bios data area.
+	 *
+	 * there is a real-mode segmented pointer pointing to the
+	 * 4K EBDA area at 0x40E, calculate and scan it here.
+	 *
+	 * NOTE! There were Linux loaders that will corrupt the EBDA
+	 * area, and as such this kind of SMP config may be less
+	 * trustworthy, simply because the SMP table may have been
+	 * stomped on during early boot.  Thankfully the bootloaders
+	 * now honour the EBDA.
+	 */
+
+	address = *(unsigned short *)phys_to_virt(0x40E);
+	address <<= 4;
+	smp_scan_config(address, 0x1000);
+}
+
+#else
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+void __init find_visws_smp(void)
+{
+	smp_found_config = 1;
+
+	phys_cpu_present_map |= 2; /* or in id 1 */
+	apic_version[1] |= 0x10; /* integrated APIC */
+	apic_version[0] |= 0x10;
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+#endif
+
+/*
+ * - Intel MP Configuration Table
+ * - or SGI Visual Workstation configuration
+ */
+void __init find_smp_config (void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	find_intel_smp();
+#endif
+#ifdef CONFIG_VISWS
+	find_visws_smp();
+#endif
+}
+
diff --git a/xen/arch/i386/pci-dma.c b/xen/arch/i386/pci-dma.c
new file mode 100644
index 0000000000..9d19cea867
--- /dev/null
+++ b/xen/arch/i386/pci-dma.c
@@ -0,0 +1,37 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/lib.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+			   dma_addr_t *dma_handle)
+{
+	void *ret;
+	int gfp = GFP_ATOMIC;
+
+	if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_bus(ret);
+	}
+	return ret;
+}
+
+void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	free_pages((unsigned long)vaddr, get_order(size));
+}
diff --git a/xen/arch/i386/pci-i386.c b/xen/arch/i386/pci-i386.c
new file mode 100644
index 0000000000..7a213d824c
--- /dev/null
+++ b/xen/arch/i386/pci-i386.c
@@ -0,0 +1,391 @@
+/*
+ *	Low-Level PCI Access for i386 machines
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing
+ *      (Unix and Linux consulting and custom programming)
+ *      Drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * Drew's work was sponsored by:
+ *	iX Multiuser Multitasking Magazine
+ *	Hannover, Germany
+ *	hm@ix.de
+ *
+ * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ *	Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ *
+ * Jan 5,  1995 : Modified to probe PCI hardware at boot time by Frederic
+ *     Potter, potter@cao-vlsi.ibp.fr
+ *
+ * Jan 10, 1995 : Modified to store the information about configured pci
+ *      devices into a list, which can be accessed via /proc/pci by
+ *      Curtis Varner, cvarner@cs.ucr.edu
+ *
+ * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
+ *	Alpha version. Intel & UMC chipset support only.
+ *
+ * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
+ *	moved to drivers/pci/pci.c.
+ *
+ * Dec 7, 1996  : Added support for direct configuration access of boards
+ *      with Intel compatible access schemes (tsbogend@alpha.franken.de)
+ *
+ * Feb 3, 1997  : Set internal functions to static, save/restore flags
+ *	avoid dead locks reading broken PCI BIOS, werner@suse.de 
+ *
+ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
+ *	(mj@atrey.karlin.mff.cuni.cz)
+ *
+ * May 7,  1997 : Added some missing cli()'s. [mj]
+ * 
+ * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
+ *      (paubert@iram.es)
+ *
+ * Aug 2,  1997 : Split to PCI BIOS handling and direct PCI access parts
+ *	and cleaned it up...     Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Feb 6,  1998 : No longer using BIOS to find devices and device classes. [mj]
+ *
+ * May 1,  1998 : Support for peer host bridges. [mj]
+ *
+ * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
+ *	can be accessed from interrupts even on SMP systems. [mj]
+ *
+ * August  1998 : Better support for peer host bridges and more paranoid
+ *	checks for direct hardware access. Ugh, this file starts to look as
+ *	a large gallery of common hardware bug workarounds (watch the comments)
+ *	-- the PCI specs themselves are sane, but most implementors should be
+ *	hit hard with \hammer scaled \magstep5. [mj]
+ *
+ * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
+ *
+ * Feb 8,  1999 : Added UM8886BF I/O address fixup. [mj]
+ *
+ * August  1999 : New resource management and configuration access stuff. [mj]
+ *
+ * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
+ *		  Based on ideas by Chris Frantz and David Hinds. [mj]
+ *
+ * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
+ *		  for a lot of patience during testing. [mj]
+ *
+ * Oct  8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
+ */
+
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+
+#include "pci-i386.h"
+
+void
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+			struct resource *res, int resource)
+{
+	u32 new, check;
+	int reg;
+
+	new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
+	if (resource < 6) {
+		reg = PCI_BASE_ADDRESS_0 + 4*resource;
+	} else if (resource == PCI_ROM_RESOURCE) {
+		res->flags |= PCI_ROM_ADDRESS_ENABLE;
+		new |= PCI_ROM_ADDRESS_ENABLE;
+		reg = dev->rom_base_reg;
+	} else {
+		/* Somebody might have asked allocation of a non-standard resource */
+		return;
+	}
+	
+	pci_write_config_dword(dev, reg, new);
+	pci_read_config_dword(dev, reg, &check);
+	if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
+		printk(KERN_ERR "PCI: Error while updating region "
+		       "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
+		       new, check);
+	}
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void
+pcibios_align_resource(void *data, struct resource *res,
+		       unsigned long size, unsigned long align)
+{
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long start = res->start;
+
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	}
+}
+
+
+/*
+ *  Handle resources of PCI devices.  If the world were perfect, we could
+ *  just allocate all the resource regions and do nothing more.  It isn't.
+ *  On the other hand, we cannot just re-allocate all devices, as it would
+ *  require us to know lots of host bridge internals.  So we attempt to
+ *  keep as much of the original configuration as possible, but tweak it
+ *  when it's found to be wrong.
+ *
+ *  Known BIOS problems we have to work around:
+ *	- I/O or memory regions not configured
+ *	- regions configured, but not enabled in the command register
+ *	- bogus I/O addresses above 64K used
+ *	- expansion ROMs left enabled (this may sound harmless, but given
+ *	  the fact the PCI specs explicitly allow address decoders to be
+ *	  shared between expansion ROMs and other resource regions, it's
+ *	  at least dangerous)
+ *
+ *  Our solution:
+ *	(1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ *	    This gives us fixed barriers on where we can allocate.
+ *	(2) Allocate resources for all enabled devices.  If there is
+ *	    a collision, just mark the resource as unallocated. Also
+ *	    disable expansion ROMs during this step.
+ *	(3) Try to allocate resources for disabled devices.  If the
+ *	    resources were assigned correctly, everything goes well,
+ *	    if they weren't, they won't disturb allocation of other
+ *	    resources.
+ *	(4) Assign new addresses to resources which were either
+ *	    not configured at all or misconfigured.  If explicitly
+ *	    requested by the user, configure expansion ROM address
+ *	    as well.
+ */
+
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+	struct list_head *ln;
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	int idx;
+	struct resource *r, *pr;
+
+	/* Depth-First Search on bus tree */
+	for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
+		bus = pci_bus_b(ln);
+		if ((dev = bus->self)) {
+			for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+				r = &dev->resource[idx];
+				if (!r->start)
+					continue;
+				pr = pci_find_parent_resource(dev, r);
+				if (!pr || request_resource(pr, r) < 0)
+					printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
+			}
+		}
+		pcibios_allocate_bus_resources(&bus->children);
+	}
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+	struct pci_dev *dev;
+	int idx, disabled;
+	u16 command;
+	struct resource *r, *pr;
+
+	pci_for_each_dev(dev) {
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		for(idx = 0; idx < 6; idx++) {
+			r = &dev->resource[idx];
+			if (r->parent)		/* Already allocated */
+				continue;
+			if (!r->start)		/* Address not assigned at all */
+				continue;
+			if (r->flags & IORESOURCE_IO)
+				disabled = !(command & PCI_COMMAND_IO);
+			else
+				disabled = !(command & PCI_COMMAND_MEMORY);
+			if (pass == disabled) {
+				DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
+				    r->start, r->end, r->flags, disabled, pass);
+				pr = pci_find_parent_resource(dev, r);
+				if (!pr || request_resource(pr, r) < 0) {
+					printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
+					/* We'll assign a new address later */
+					r->end -= r->start;
+					r->start = 0;
+				}
+			}
+		}
+		if (!pass) {
+			r = &dev->resource[PCI_ROM_RESOURCE];
+			if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
+				/* Turn the ROM off, leave the resource region, but keep it unregistered. */
+				u32 reg;
+				DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
+				r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
+				pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+				pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
+			}
+		}
+	}
+}
+
+static void __init pcibios_assign_resources(void)
+{
+	struct pci_dev *dev;
+	int idx;
+	struct resource *r;
+
+	pci_for_each_dev(dev) {
+		int class = dev->class >> 8;
+
+		/* Don't touch classless devices and host bridges */
+		if (!class || class == PCI_CLASS_BRIDGE_HOST)
+			continue;
+
+		for(idx=0; idx<6; idx++) {
+			r = &dev->resource[idx];
+
+			/*
+			 *  Don't touch IDE controllers and I/O ports of video cards!
+			 */
+			if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
+			    (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
+				continue;
+
+			/*
+			 *  We shall assign a new address to this resource, either because
+			 *  the BIOS forgot to do so or because we have decided the old
+			 *  address was unusable for some reason.
+			 */
+			if (!r->start && r->end)
+				pci_assign_resource(dev, idx);
+		}
+
+		if (pci_probe & PCI_ASSIGN_ROMS) {
+			r = &dev->resource[PCI_ROM_RESOURCE];
+			r->end -= r->start;
+			r->start = 0;
+			if (r->end)
+				pci_assign_resource(dev, PCI_ROM_RESOURCE);
+		}
+	}
+}
+
+void __init pcibios_resource_survey(void)
+{
+	DBG("PCI: Allocating resources\n");
+	pcibios_allocate_bus_resources(&pci_root_buses);
+	pcibios_allocate_resources(0);
+	pcibios_allocate_resources(1);
+	pcibios_assign_resources();
+}
+
+int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for(idx=0; idx<6; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
+			continue;
+			
+		r = &dev->resource[idx];
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (dev->resource[PCI_ROM_RESOURCE].start)
+		cmd |= PCI_COMMAND_MEMORY;
+	if (cmd != old_cmd) {
+		printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+#if 0
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	unsigned long prot;
+
+	/* I/O space cannot be accessed via normal processor loads and
+	 * stores on this platform.
+	 */
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/* Leave vm_pgoff as-is, the PCI space address is the physical
+	 * address on this platform.
+	 */
+	vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
+
+	prot = pgprot_val(vma->vm_page_prot);
+	if (boot_cpu_data.x86 > 3)
+		prot |= _PAGE_PCD | _PAGE_PWT;
+	vma->vm_page_prot = __pgprot(prot);
+
+	/* Write-combine setting is ignored, it is changed via the mtrr
+	 * interfaces on this platform.
+	 */
+	if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+			     vma->vm_end - vma->vm_start,
+			     vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+#endif
diff --git a/xen/arch/i386/pci-i386.h b/xen/arch/i386/pci-i386.h
new file mode 100644
index 0000000000..2d051c51b2
--- /dev/null
+++ b/xen/arch/i386/pci-i386.h
@@ -0,0 +1,69 @@
+/*
+ *	Low-Level PCI Access for i386 machines.
+ *
+ *	(c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS		0x0001
+#define PCI_PROBE_CONF1		0x0002
+#define PCI_PROBE_CONF2		0x0004
+#define PCI_NO_SORT		0x0100
+#define PCI_BIOS_SORT		0x0200
+#define PCI_NO_CHECKS		0x0400
+#define PCI_ASSIGN_ROMS		0x1000
+#define PCI_BIOS_IRQ_SCAN	0x2000
+#define PCI_ASSIGN_ALL_BUSSES	0x4000
+
+extern unsigned int pci_probe;
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+int pcibios_enable_resources(struct pci_dev *, int);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops *pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+	u8 bus, devfn;			/* Bus, device and function */
+	struct {
+		u8 link;		/* IRQ line ID, chipset dependent, 0=not routed */
+		u16 bitmap;		/* Available IRQs */
+	} __attribute__((packed)) irq[4];
+	u8 slot;			/* Slot number, 0=onboard */
+	u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+	u32 signature;			/* PIRQ_SIGNATURE should be here */
+	u16 version;			/* PIRQ_VERSION */
+	u16 size;			/* Table size in bytes */
+	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
+	u32 miniport_data;		/* Crap */
+	u8 rfu[11];
+	u8 checksum;			/* Modulo 256 checksum must give zero */
+	struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+void pcibios_irq_init(void);
+void pcibios_fixup_irqs(void);
+void pcibios_enable_irq(struct pci_dev *dev);
diff --git a/xen/arch/i386/pci-irq.c b/xen/arch/i386/pci-irq.c
new file mode 100644
index 0000000000..b7a212b014
--- /dev/null
+++ b/xen/arch/i386/pci-irq.c
@@ -0,0 +1,795 @@
+/*
+ *	Low-Level PCI Support for PC -- Routing of Interrupts
+ *
+ *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+
+#include "pci-i386.h"
+
+#define PIRQ_SIGNATURE	(('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+int broken_hp_bios_irq9;
+
+static struct irq_routing_table *pirq_table;
+
+/*
+ * Never use: 0, 1, 2 (timer, keyboard, and cascade)
+ * Avoid using: 13, 14 and 15 (FP error and IDE).
+ * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
+ */
+unsigned int pcibios_irq_mask = 0xfff8;
+
+static int pirq_penalty[16] = {
+	1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
+	0, 0, 0, 0, 1000, 100000, 100000, 100000
+};
+
+struct irq_router {
+	char *name;
+	u16 vendor, device;
+	int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+	int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+/*
+ *  Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
+
+static struct irq_routing_table * __init pirq_find_routing_table(void)
+{
+	u8 *addr;
+	struct irq_routing_table *rt;
+	int i;
+	u8 sum;
+
+	for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
+		rt = (struct irq_routing_table *) addr;
+		if (rt->signature != PIRQ_SIGNATURE ||
+		    rt->version != PIRQ_VERSION ||
+		    rt->size % 16 ||
+		    rt->size < sizeof(struct irq_routing_table))
+			continue;
+		sum = 0;
+		for(i=0; i<rt->size; i++)
+			sum += addr[i];
+		if (!sum) {
+			DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+			return rt;
+		}
+	}
+	return NULL;
+}
+
+/*
+ *  If we have a IRQ routing table, use it to search for peer host
+ *  bridges.  It's a gross hack, but since there are no other known
+ *  ways how to get a list of buses, we have to go this way.
+ */
+
+static void __init pirq_peer_trick(void)
+{
+	struct irq_routing_table *rt = pirq_table;
+	u8 busmap[256];
+	int i;
+	struct irq_info *e;
+
+	memset(busmap, 0, sizeof(busmap));
+	for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+		e = &rt->slots[i];
+#ifdef DEBUG
+		{
+			int j;
+			DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+			for(j=0; j<4; j++)
+				DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+			DBG("\n");
+		}
+#endif
+		busmap[e->bus] = 1;
+	}
+	for(i=1; i<256; i++)
+		/*
+		 *  It might be a secondary bus, but in this case its parent is already
+		 *  known (ascending bus order) and therefore pci_scan_bus returns immediately.
+		 */
+		if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
+			printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+	pcibios_last_bus = -1;
+}
+
+/*
+ *  Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+static void eisa_set_level_irq(unsigned int irq)
+{
+	unsigned char mask = 1 << (irq & 7);
+	unsigned int port = 0x4d0 + (irq >> 3);
+	unsigned char val = inb(port);
+
+	if (!(val & mask)) {
+		DBG(" -> edge");
+		outb(val | mask, port);
+	}
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+	u8 x;
+	unsigned reg = offset + (nr >> 1);
+
+	pci_read_config_byte(router, reg, &x);
+	return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+	u8 x;
+	unsigned reg = offset + (nr >> 1);
+
+	pci_read_config_byte(router, reg, &x);
+	x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+	pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+	return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+	unsigned int val = irqmap[irq];
+		
+	if (val) {
+		write_config_nybble(router, 0x48, pirq-1, val);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	u8 x;
+
+	pci_read_config_byte(router, pirq, &x);
+	return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	pci_write_config_byte(router, pirq, irq);
+	return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	return read_config_nybble(router, 0x55, pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	write_config_nybble(router, 0x55, pirq, irq);
+	return 1;
+}
+
+/*
+ * ITE 8330G pirq rules are nibble-based
+ * FIXME: pirqmap may be { 1, 0, 3, 2 },
+ * 	  2+3 are both mapped to irq 9 on my system
+ */
+static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+	return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+}
+
+static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+	write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+	return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	write_config_nybble(router, 0xb8, pirq >> 4, irq);
+	return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	return read_config_nybble(router, 0x5C, (pirq-1)^1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
+	return 1;
+}
+
+/*
+ *	PIRQ routing for SiS 85C503 router used in several SiS chipsets
+ *	According to the SiS 5595 datasheet (preliminary V1.0, 12/24/1997)
+ *	the related registers work as follows:
+ *	
+ *	general: one byte per re-routable IRQ,
+ *		 bit 7      IRQ mapping enabled (0) or disabled (1)
+ *		 bits [6:4] reserved
+ *		 bits [3:0] IRQ to map to
+ *		     allowed: 3-7, 9-12, 14-15
+ *		     reserved: 0, 1, 2, 8, 13
+ *
+ *	individual registers in device config space:
+ *
+ *	0x41/0x42/0x43/0x44:	PCI INT A/B/C/D - bits as in general case
+ *
+ *	0x61:			IDEIRQ: bits as in general case - but:
+ *				bits [6:5] must be written 01
+ *				bit 4 channel-select primary (0), secondary (1)
+ *
+ *	0x62:			USBIRQ: bits as in general case - but:
+ *				bit 4 OHCI function disabled (0), enabled (1)
+ *	
+ *	0x6a:			ACPI/SCI IRQ - bits as in general case
+ *
+ *	0x7e:			Data Acq. Module IRQ - bits as in general case
+ *
+ *	Apparently there are systems implementing PCI routing table using both
+ *	link values 0x01-0x04 and 0x41-0x44 for PCI INTA..D, but register offsets
+ *	like 0x62 as link values for USBIRQ e.g. So there is no simple
+ *	"register = offset + pirq" relation.
+ *	Currently we support PCI INTA..D and USBIRQ and try our best to handle
+ *	both link mappings.
+ *	IDE/ACPI/DAQ mapping is currently unsupported (left untouched as set by BIOS).
+ */
+
+static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	u8 x;
+	int reg = pirq;
+
+	switch(pirq) {
+		case 0x01:
+		case 0x02:
+		case 0x03:
+		case 0x04:
+			reg += 0x40;
+		case 0x41:
+		case 0x42:
+		case 0x43:
+		case 0x44:
+		case 0x62:
+			pci_read_config_byte(router, reg, &x);
+			if (reg != 0x62)
+				break;
+			if (!(x & 0x40))
+				return 0;
+			break;
+		case 0x61:
+		case 0x6a:
+		case 0x7e:
+			printk(KERN_INFO "SiS pirq: advanced IDE/ACPI/DAQ mapping not yet implemented\n");
+			return 0;
+		default:			
+			printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+			return 0;
+	}
+	return (x & 0x80) ? 0 : (x & 0x0f);
+}
+
+static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	u8 x;
+	int reg = pirq;
+
+	switch(pirq) {
+		case 0x01:
+		case 0x02:
+		case 0x03:
+		case 0x04:
+			reg += 0x40;
+		case 0x41:
+		case 0x42:
+		case 0x43:
+		case 0x44:
+		case 0x62:
+			x = (irq&0x0f) ? (irq&0x0f) : 0x80;
+			if (reg != 0x62)
+				break;
+			/* always mark OHCI enabled, as nothing else knows about this */
+			x |= 0x40;
+			break;
+		case 0x61:
+		case 0x6a:
+		case 0x7e:
+			printk(KERN_INFO "advanced SiS pirq mapping not yet implemented\n");
+			return 0;
+		default:			
+			printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+			return 0;
+	}
+	pci_write_config_byte(router, reg, x);
+
+	return 1;
+}
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ *       config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ *       Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ *       devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ *       for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	if (pirq > 8) {
+		printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+		return 0;
+	}
+	return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	if (pirq > 8) {
+		printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+		return 0;
+	}
+	write_config_nybble(router, 0x74, pirq-1, irq);
+	return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01).  The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a.  The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register.  There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	outb_p(pirq, 0xc00);
+	return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	outb_p(pirq, 0xc00);
+	outb_p(irq, 0xc01);
+	return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA  4-7  PIRQB
+ * offset 0x57 0-3 PIRQC  4-7  PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+	u8 irq;
+	irq = 0;
+	if (pirq <= 4)
+	{
+		irq = read_config_nybble(router, 0x56, pirq - 1);
+	}
+	printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+		dev->vendor, dev->device, pirq, irq);
+	return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 
+		dev->vendor, dev->device, pirq, irq);
+	if (pirq <= 4)
+	{
+		write_config_nybble(router, 0x56, pirq - 1, irq);
+	}
+	return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+	struct pci_dev *bridge;
+	int pin = pci_get_interrupt_pin(dev, &bridge);
+	return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+static struct irq_router pirq_bios_router =
+	{ "BIOS", 0, 0, NULL, pirq_bios_set };
+
+#endif
+
+static struct irq_router pirq_routers[] = {
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX,   pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_10, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, pirq_piix_get, pirq_piix_set },
+	{ "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801E_0, pirq_piix_get, pirq_piix_set },
+
+	{ "ALI", PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, pirq_ali_get, pirq_ali_set },
+
+	{ "ITE", PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_IT8330G_0, pirq_ite_get, pirq_ite_set },
+
+	{ "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, pirq_via_get, pirq_via_set },
+	{ "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, pirq_via_get, pirq_via_set },
+	{ "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, pirq_via_get, pirq_via_set },
+
+	{ "OPTI", PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C700, pirq_opti_get, pirq_opti_set },
+
+	{ "NatSemi", PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, pirq_cyrix_get, pirq_cyrix_set },
+	{ "SIS", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, pirq_sis_get, pirq_sis_set },
+	{ "VLSI 82C534", PCI_VENDOR_ID_VLSI, PCI_DEVICE_ID_VLSI_82C534, pirq_vlsi_get, pirq_vlsi_set },
+	{ "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4,
+	  pirq_serverworks_get, pirq_serverworks_set },
+	{ "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5,
+	  pirq_serverworks_get, pirq_serverworks_set },
+	{ "AMD756 VIPER", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B,
+		pirq_amd756_get, pirq_amd756_set },
+	{ "AMD766", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7413,
+		pirq_amd756_get, pirq_amd756_set },
+	{ "AMD768", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7443,
+		pirq_amd756_get, pirq_amd756_set },
+
+	{ "default", 0, 0, NULL, NULL }
+};
+
+static struct irq_router *pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+static void __init pirq_find_router(void)
+{
+	struct irq_routing_table *rt = pirq_table;
+	struct irq_router *r;
+
+#ifdef CONFIG_PCI_BIOS
+	if (!rt->signature) {
+		printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+		pirq_router = &pirq_bios_router;
+		return;
+	}
+#endif
+
+	DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+	    rt->rtr_vendor, rt->rtr_device);
+
+	/* fall back to default router if nothing else found */
+	pirq_router = &pirq_routers[ARRAY_SIZE(pirq_routers) - 1];
+
+	pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+	if (!pirq_router_dev) {
+		DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+		return;
+	}
+
+	for(r=pirq_routers; r->vendor; r++) {
+		/* Exact match against router table entry? Use it! */
+		if (r->vendor == rt->rtr_vendor && r->device == rt->rtr_device) {
+			pirq_router = r;
+			break;
+		}
+		/* Match against router device entry? Use it as a fallback */
+		if (r->vendor == pirq_router_dev->vendor && r->device == pirq_router_dev->device) {
+			pirq_router = r;
+		}
+	}
+	printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+		pirq_router->name,
+		pirq_router_dev->vendor,
+		pirq_router_dev->device,
+		pirq_router_dev->slot_name);
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+	struct irq_routing_table *rt = pirq_table;
+	int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+	struct irq_info *info;
+
+	for (info = rt->slots; entries--; info++)
+		if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+			return info;
+	return NULL;
+}
+
+static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+	u8 pin;
+	struct irq_info *info;
+	int i, pirq, newirq;
+	int irq = 0;
+	u32 mask;
+	struct irq_router *r = pirq_router;
+	struct pci_dev *dev2;
+	char *msg = NULL;
+
+	if (!pirq_table)
+		return 0;
+
+	/* Find IRQ routing entry */
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (!pin) {
+		DBG(" -> no interrupt pin\n");
+		return 0;
+	}
+	pin = pin - 1;
+	
+	DBG("IRQ for %s:%d", dev->slot_name, pin);
+	info = pirq_get_info(dev);
+	if (!info) {
+		DBG(" -> not found in routing table\n");
+		return 0;
+	}
+	pirq = info->irq[pin].link;
+	mask = info->irq[pin].bitmap;
+	if (!pirq) {
+		DBG(" -> not routed\n");
+		return 0;
+	}
+	DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+	mask &= pcibios_irq_mask;
+
+	/* Work around broken HP Pavilion Notebooks which assign USB to
+	   IRQ 9 even though it is actually wired to IRQ 11 */
+
+	if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
+		dev->irq = 11;
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
+		r->set(pirq_router_dev, dev, pirq, 11);
+	}
+
+	/*
+	 * Find the best IRQ to assign: use the one
+	 * reported by the device if possible.
+	 */
+	newirq = dev->irq;
+	if (!newirq && assign) {
+		for (i = 0; i < 16; i++) {
+			if (!(mask & (1 << i)))
+				continue;
+			if (pirq_penalty[i] < pirq_penalty[newirq] &&
+			    !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) {
+				free_irq(i, dev);
+				newirq = i;
+			}
+		}
+	}
+	DBG(" -> newirq=%d", newirq);
+
+	/* Check if it is hardcoded */
+	if ((pirq & 0xf0) == 0xf0) {
+		irq = pirq & 0xf;
+		DBG(" -> hardcoded IRQ %d\n", irq);
+		msg = "Hardcoded";
+	} else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
+		DBG(" -> got IRQ %d\n", irq);
+		msg = "Found";
+	} else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+		DBG(" -> assigning IRQ %d", newirq);
+		if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+			eisa_set_level_irq(newirq);
+			DBG(" ... OK\n");
+			msg = "Assigned";
+			irq = newirq;
+		}
+	}
+
+	if (!irq) {
+		DBG(" ... failed\n");
+		if (newirq && mask == (1 << newirq)) {
+			msg = "Guessed";
+			irq = newirq;
+		} else
+			return 0;
+	}
+	printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
+
+	/* Update IRQ for all devices with the same pirq value */
+	pci_for_each_dev(dev2) {
+		pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+		if (!pin)
+			continue;
+		pin--;
+		info = pirq_get_info(dev2);
+		if (!info)
+			continue;
+		if (info->irq[pin].link == pirq) {
+			/* We refuse to override the dev->irq information. Give a warning! */
+		    	if (dev2->irq && dev2->irq != irq) {
+		    		printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+				       dev2->slot_name, dev2->irq, irq);
+		    		continue;
+		    	}
+			dev2->irq = irq;
+			pirq_penalty[irq]++;
+			if (dev != dev2)
+				printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
+		}
+	}
+	return 1;
+}
+
+void __init pcibios_irq_init(void)
+{
+	DBG("PCI: IRQ init\n");
+	pirq_table = pirq_find_routing_table();
+#ifdef CONFIG_PCI_BIOS
+	if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+		pirq_table = pcibios_get_irq_routing_table();
+#endif
+	if (pirq_table) {
+		pirq_peer_trick();
+		pirq_find_router();
+		if (pirq_table->exclusive_irqs) {
+			int i;
+			for (i=0; i<16; i++)
+				if (!(pirq_table->exclusive_irqs & (1 << i)))
+					pirq_penalty[i] += 100;
+		}
+		/* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+		if (io_apic_assign_pci_irqs)
+			pirq_table = NULL;
+	}
+}
+
+void __init pcibios_fixup_irqs(void)
+{
+	struct pci_dev *dev;
+	u8 pin;
+
+	DBG("PCI: IRQ fixup\n");
+	pci_for_each_dev(dev) {
+		/*
+		 * If the BIOS has set an out of range IRQ number, just ignore it.
+		 * Also keep track of which IRQ's are already in use.
+		 */
+		if (dev->irq >= 16) {
+			DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
+			dev->irq = 0;
+		}
+		/* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+		if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+			pirq_penalty[dev->irq] = 0;
+		pirq_penalty[dev->irq]++;
+	}
+
+	pci_for_each_dev(dev) {
+		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+		/*
+		 * Recalculate IRQ numbers if we use the I/O APIC.
+		 */
+		if (io_apic_assign_pci_irqs)
+		{
+			int irq;
+
+			if (pin) {
+				pin--;		/* interrupt pins are numbered starting from 1 */
+				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+	/*
+	 * Busses behind bridges are typically not listed in the MP-table.
+	 * In this case we have to look up the IRQ based on the parent bus,
+	 * parent slot, and pin number. The SMP code detects such bridged
+	 * busses itself so we should get into this branch reliably.
+	 */
+				if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+					struct pci_dev * bridge = dev->bus->self;
+
+					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+					irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
+							PCI_SLOT(bridge->devfn), pin);
+					if (irq >= 0)
+						printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", 
+							bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
+				}
+				if (irq >= 0) {
+					printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
+						dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+					dev->irq = irq;
+				}
+			}
+		}
+#endif
+		/*
+		 * Still no IRQ? Try to lookup one...
+		 */
+		if (pin && !dev->irq)
+			pcibios_lookup_irq(dev, 0);
+	}
+}
+
+void pcibios_penalize_isa_irq(int irq)
+{
+	/*
+	 *  If any ISAPnP device reports an IRQ in its list of possible
+	 *  IRQ's, we try to avoid assigning it to PCI devices.
+	 */
+	pirq_penalty[irq] += 100;
+}
+
+void pcibios_enable_irq(struct pci_dev *dev)
+{
+	u8 pin;
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+		char *msg;
+		if (io_apic_assign_pci_irqs)
+			msg = " Probably buggy MP table.";
+		else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+			msg = "";
+		else
+			msg = " Please try using pci=biosirq.";
+		printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+		       'A' + pin - 1, dev->slot_name, msg);
+	}
+}
diff --git a/xen/arch/i386/pci-pc.c b/xen/arch/i386/pci-pc.c
new file mode 100644
index 0000000000..d63a54a79c
--- /dev/null
+++ b/xen/arch/i386/pci-pc.c
@@ -0,0 +1,1494 @@
+/*
+ *	Low-Level PCI Support for PC
+ *
+ *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+
+/*#include <asm/segment.h>*/
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/smpboot.h>
+
+#include "pci-i386.h"
+
+extern int numnodes;
+#define __KERNEL_CS __HYPERVISOR_CS
+#define __KERNEL_DS __HYPERVISOR_DS
+
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus = NULL;
+struct pci_ops *pci_root_ops = NULL;
+
+int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
+int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
+
+#ifdef CONFIG_MULTIQUAD
+#define BUS2QUAD(global) (mp_bus_id_to_node[global])
+#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
+#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+#else
+#define BUS2QUAD(global) (0)
+#define BUS2LOCAL(global) (global)
+#define QUADLOCAL2BUS(quad,local) (local)
+#endif
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ * Functions for accessing PCI configuration space with type 1 accesses
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+
+#ifdef CONFIG_MULTIQUAD
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+	(0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */
+{
+	unsigned long flags;
+
+	if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
+
+	switch (len) {
+	case 1:
+		*value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+		break;
+	case 2:
+		*value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+		break;
+	case 4:
+		*value = inl_quad(0xCFC, BUS2QUAD(bus));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */
+{
+	unsigned long flags;
+
+	if (bus > 255 || dev > 31 || fn > 7 || reg > 255) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
+
+	switch (len) {
+	case 1:
+		outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+		break;
+	case 2:
+		outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+		break;
+	case 4:
+		outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+	int result; 
+	u32 data;
+
+	result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, &data);
+
+	*value = (u8)data;
+
+	return result;
+}
+
+static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+	int result; 
+	u32 data;
+
+	result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, &data);
+
+	*value = (u16)data;
+
+	return result;
+}
+
+static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+	if (!value) 
+		return -EINVAL;
+
+	return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+	return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value)
+{
+	return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+	return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_mq_conf1 = {
+	pci_conf1_read_mq_config_byte,
+	pci_conf1_read_mq_config_word,
+	pci_conf1_read_mq_config_dword,
+	pci_conf1_write_mq_config_byte,
+	pci_conf1_write_mq_config_word,
+	pci_conf1_write_mq_config_dword
+};
+
+#endif /* !CONFIG_MULTIQUAD */
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+	(0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */
+{
+	unsigned long flags;
+
+	if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+	switch (len) {
+	case 1:
+		*value = inb(0xCFC + (reg & 3));
+		break;
+	case 2:
+		*value = inw(0xCFC + (reg & 2));
+		break;
+	case 4:
+		*value = inl(0xCFC);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */
+{
+	unsigned long flags;
+
+	if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+	switch (len) {
+	case 1:
+		outb((u8)value, 0xCFC + (reg & 3));
+		break;
+	case 2:
+		outw((u16)value, 0xCFC + (reg & 2));
+		break;
+	case 4:
+		outl((u32)value, 0xCFC);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+#undef PCI_CONF1_ADDRESS
+
+static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+	int result; 
+	u32 data;
+
+	result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, &data);
+
+	*value = (u8)data;
+
+	return result;
+}
+
+static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+	int result; 
+	u32 data;
+
+	result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, &data);
+
+	*value = (u16)data;
+
+	return result;
+}
+
+static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+	return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+	return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+	return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+	return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf1 = {
+	pci_conf1_read_config_byte,
+	pci_conf1_read_config_word,
+	pci_conf1_read_config_dword,
+	pci_conf1_write_config_byte,
+	pci_conf1_write_config_word,
+	pci_conf1_write_config_dword
+};
+
+
+/*
+ * Functions for accessing PCI configuration space with type 2 accesses
+ */
+
+#define PCI_CONF2_ADDRESS(dev, reg)	(u16)(0xC000 | (dev << 8) | reg)
+
+static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+
+	if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+		return -EINVAL;
+
+	if (dev & 0x10) 
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+	outb((u8)bus, 0xCFA);
+
+	switch (len) {
+	case 1:
+		*value = inb(PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 2:
+		*value = inw(PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 4:
+		*value = inl(PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	}
+
+	outb (0, 0xCF8);
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+
+	if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) 
+		return -EINVAL;
+
+	if (dev & 0x10) 
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+	outb((u8)bus, 0xCFA);
+
+	switch (len) {
+	case 1:
+		outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 2:
+		outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	case 4:
+		outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg));
+		break;
+	}
+
+	outb (0, 0xCF8);    
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+#undef PCI_CONF2_ADDRESS
+
+static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+	int result; 
+	u32 data;
+	result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, &data);
+	*value = (u8)data;
+	return result;
+}
+
+static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+	int result; 
+	u32 data;
+	result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, &data);
+	*value = (u16)data;
+	return result;
+}
+
+static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+	return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+	return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+	return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+	return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf2 = {
+	pci_conf2_read_config_byte,
+	pci_conf2_read_config_word,
+	pci_conf2_read_config_dword,
+	pci_conf2_write_config_byte,
+	pci_conf2_write_config_word,
+	pci_conf2_write_config_dword
+};
+
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __devinit pci_sanity_check(struct pci_ops *o)
+{
+	u16 x;
+	struct pci_bus bus;		/* Fake bus and device */
+	struct pci_dev dev;
+
+	if (pci_probe & PCI_NO_CHECKS)
+		return 1;
+	bus.number = 0;
+	dev.bus = &bus;
+	for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
+		if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
+		     (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
+		    (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
+		     (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
+			return 1;
+	DBG("PCI: Sanity check failed\n");
+	return 0;
+}
+
+static struct pci_ops * __devinit pci_check_direct(void)
+{
+	unsigned int tmp;
+	unsigned long flags;
+
+	__save_flags(flags); __cli();
+
+	/*
+	 * Check if configuration type 1 works.
+	 */
+	if (pci_probe & PCI_PROBE_CONF1) {
+		outb (0x01, 0xCFB);
+		tmp = inl (0xCF8);
+		outl (0x80000000, 0xCF8);
+		if (inl (0xCF8) == 0x80000000 &&
+		    pci_sanity_check(&pci_direct_conf1)) {
+			outl (tmp, 0xCF8);
+			__restore_flags(flags);
+			printk(KERN_INFO "PCI: Using configuration type 1\n");
+			request_region(0xCF8, 8, "PCI conf1");
+
+#ifdef CONFIG_MULTIQUAD			
+			/* Multi-Quad has an extended PCI Conf1 */
+			if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+				return &pci_direct_mq_conf1;
+#endif				
+			return &pci_direct_conf1;
+		}
+		outl (tmp, 0xCF8);
+	}
+
+	/*
+	 * Check if configuration type 2 works.
+	 */
+	if (pci_probe & PCI_PROBE_CONF2) {
+		outb (0x00, 0xCFB);
+		outb (0x00, 0xCF8);
+		outb (0x00, 0xCFA);
+		if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
+		    pci_sanity_check(&pci_direct_conf2)) {
+			__restore_flags(flags);
+			printk(KERN_INFO "PCI: Using configuration type 2\n");
+			request_region(0xCF8, 4, "PCI conf2");
+			return &pci_direct_conf2;
+		}
+	}
+
+	__restore_flags(flags);
+	return NULL;
+}
+
+#endif
+
+/*
+ * BIOS32 and PCI BIOS handling.
+ */
+
+#ifdef CONFIG_PCI_BIOS
+
+#define PCIBIOS_PCI_FUNCTION_ID 	0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT 	0xb101
+#define PCIBIOS_FIND_PCI_DEVICE		0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE	0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE	0xb106
+#define PCIBIOS_READ_CONFIG_BYTE	0xb108
+#define PCIBIOS_READ_CONFIG_WORD	0xb109
+#define PCIBIOS_READ_CONFIG_DWORD	0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE	0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD	0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD	0xb10d
+#define PCIBIOS_GET_ROUTING_OPTIONS	0xb10e
+#define PCIBIOS_SET_PCI_HW_INT		0xb10f
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE		(('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE		(('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/* PCI BIOS hardware mechanism flags */
+#define PCIBIOS_HW_TYPE1		0x01
+#define PCIBIOS_HW_TYPE2		0x02
+#define PCIBIOS_HW_TYPE1_SPEC		0x10
+#define PCIBIOS_HW_TYPE2_SPEC		0x20
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * 	Standard BIOS 32-bit Service Directory Proposal
+ * 	Revision 0.4 May 24, 1993
+ * 	Phoenix Technologies Ltd.
+ *	Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+	struct {
+		unsigned long signature;	/* _32_ */
+		unsigned long entry;		/* 32 bit physical address */
+		unsigned char revision;		/* Revision level, 0 */
+		unsigned char length;		/* Length in paragraphs should be 01 */
+		unsigned char checksum;		/* All bytes must add up to zero */
+		unsigned char reserved[5]; 	/* Must be zero */
+	} fields;
+	char chars[16];
+};
+
+/*
+ * Physical address of the service directory.  I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make pcibios_present() take a memory start parameter and store
+ * the array there.
+ */
+
+static struct {
+	unsigned long address;
+	unsigned short segment;
+} bios32_indirect = { 0, __KERNEL_CS };
+
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+	unsigned char return_code;	/* %al */
+	unsigned long address;		/* %ebx */
+	unsigned long length;		/* %ecx */
+	unsigned long entry;		/* %edx */
+	unsigned long flags;
+
+	__save_flags(flags); __cli();
+	__asm__("lcall *(%%edi); cld"
+		: "=a" (return_code),
+		  "=b" (address),
+		  "=c" (length),
+		  "=d" (entry)
+		: "0" (service),
+		  "1" (0),
+		  "D" (&bios32_indirect));
+	__restore_flags(flags);
+
+	switch (return_code) {
+		case 0:
+			return address + entry;
+		case 0x80:	/* Not present */
+			printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
+			return 0;
+		default: /* Shouldn't happen */
+			printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
+				service, return_code);
+			return 0;
+	}
+}
+
+static struct {
+	unsigned long address;
+	unsigned short segment;
+} pci_indirect = { 0, __KERNEL_CS };
+
+static int pci_bios_present;
+
+static int __devinit check_pcibios(void)
+{
+	u32 signature, eax, ebx, ecx;
+	u8 status, major_ver, minor_ver, hw_mech;
+	unsigned long flags, pcibios_entry;
+
+	if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+		pci_indirect.address = pcibios_entry + PAGE_OFFSET;
+
+		__save_flags(flags); __cli();
+		__asm__(
+			"lcall *(%%edi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=d" (signature),
+			  "=a" (eax),
+			  "=b" (ebx),
+			  "=c" (ecx)
+			: "1" (PCIBIOS_PCI_BIOS_PRESENT),
+			  "D" (&pci_indirect)
+			: "memory");
+		__restore_flags(flags);
+
+		status = (eax >> 8) & 0xff;
+		hw_mech = eax & 0xff;
+		major_ver = (ebx >> 8) & 0xff;
+		minor_ver = ebx & 0xff;
+		if (pcibios_last_bus < 0)
+			pcibios_last_bus = ecx & 0xff;
+		DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
+			status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
+		if (status || signature != PCI_SIGNATURE) {
+			printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
+				status, signature);
+			return 0;
+		}
+		printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
+			major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
+#ifdef CONFIG_PCI_DIRECT
+		if (!(hw_mech & PCIBIOS_HW_TYPE1))
+			pci_probe &= ~PCI_PROBE_CONF1;
+		if (!(hw_mech & PCIBIOS_HW_TYPE2))
+			pci_probe &= ~PCI_PROBE_CONF2;
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+					unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+	unsigned short bx;
+	unsigned short ret;
+
+	__asm__("lcall *(%%edi); cld\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=b" (bx),
+		  "=a" (ret)
+		: "1" (PCIBIOS_FIND_PCI_DEVICE),
+		  "c" (device_id),
+		  "d" (vendor),
+		  "S" ((int) index),
+		  "D" (&pci_indirect));
+	*bus = (bx >> 8) & 0xff;
+	*device_fn = bx & 0xff;
+	return (int) (ret & 0xff00) >> 8;
+}
+
+static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+	unsigned long result = 0;
+	unsigned long flags;
+	unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+	if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	switch (len) {
+	case 1:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=c" (*value),
+			  "=a" (result)
+			: "1" (PCIBIOS_READ_CONFIG_BYTE),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 2:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=c" (*value),
+			  "=a" (result)
+			: "1" (PCIBIOS_READ_CONFIG_WORD),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 4:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=c" (*value),
+			  "=a" (result)
+			: "1" (PCIBIOS_READ_CONFIG_DWORD),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+	unsigned long result = 0;
+	unsigned long flags;
+	unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+	if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	switch (len) {
+	case 1:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=a" (result)
+			: "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+			  "c" (value),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 2:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=a" (result)
+			: "0" (PCIBIOS_WRITE_CONFIG_WORD),
+			  "c" (value),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	case 4:
+		__asm__("lcall *(%%esi); cld\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:"
+			: "=a" (result)
+			: "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+			  "c" (value),
+			  "b" (bx),
+			  "D" ((long)reg),
+			  "S" (&pci_indirect));
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+	int result; 
+	u32 data;
+
+	if (!value) 
+		BUG();
+
+	result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, &data);
+
+	*value = (u8)data;
+
+	return result;
+}
+
+static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+	int result; 
+	u32 data;
+
+	if (!value) 
+		BUG();
+
+	result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, &data);
+
+	*value = (u16)data;
+
+	return result;
+}
+
+static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+	if (!value) 
+		BUG();
+	
+	return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+	return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+	return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+	return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+		PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+
+/*
+ * Function table for BIOS32 access
+ */
+
+static struct pci_ops pci_bios_access = {
+      pci_bios_read_config_byte,
+      pci_bios_read_config_word,
+      pci_bios_read_config_dword,
+      pci_bios_write_config_byte,
+      pci_bios_write_config_word,
+      pci_bios_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS.
+ */
+
+static struct pci_ops * __devinit pci_find_bios(void)
+{
+	union bios32 *check;
+	unsigned char sum;
+	int i, length;
+
+	/*
+	 * Follow the standard procedure for locating the BIOS32 Service
+	 * directory by scanning the permissible address range from
+	 * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+	 */
+
+	for (check = (union bios32 *) __va(0xe0000);
+	     check <= (union bios32 *) __va(0xffff0);
+	     ++check) {
+		if (check->fields.signature != BIOS32_SIGNATURE)
+			continue;
+		length = check->fields.length * 16;
+		if (!length)
+			continue;
+		sum = 0;
+		for (i = 0; i < length ; ++i)
+			sum += check->chars[i];
+		if (sum != 0)
+			continue;
+		if (check->fields.revision != 0) {
+			printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
+				check->fields.revision, check);
+			continue;
+		}
+		DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
+		if (check->fields.entry >= 0x100000) {
+			printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+			return NULL;
+		} else {
+			unsigned long bios32_entry = check->fields.entry;
+			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+			bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+			if (check_pcibios())
+				return &pci_bios_access;
+		}
+		break;	/* Hopefully more than one BIOS32 cannot happen... */
+	}
+
+	return NULL;
+}
+
+/*
+ * Sort the device list according to PCI BIOS. Nasty hack, but since some
+ * fool forgot to define the `correct' device order in the PCI BIOS specs
+ * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
+ * which used BIOS ordering, we are bound to do this...
+ */
+
+static void __devinit pcibios_sort(void)
+{
+	LIST_HEAD(sorted_devices);
+	struct list_head *ln;
+	struct pci_dev *dev, *d;
+	int idx, found;
+	unsigned char bus, devfn;
+
+	DBG("PCI: Sorting device list...\n");
+	while (!list_empty(&pci_devices)) {
+		ln = pci_devices.next;
+		dev = pci_dev_g(ln);
+		idx = found = 0;
+		while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
+			idx++;
+			for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) {
+				d = pci_dev_g(ln);
+				if (d->bus->number == bus && d->devfn == devfn) {
+					list_del(&d->global_list);
+					list_add_tail(&d->global_list, &sorted_devices);
+					if (d == dev)
+						found = 1;
+					break;
+				}
+			}
+			if (ln == &pci_devices) {
+				printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
+				/*
+				 * We must not continue scanning as several buggy BIOSes
+				 * return garbage after the last device. Grr.
+				 */
+				break;
+			}
+		}
+		if (!found) {
+			printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n",
+				dev->bus->number, dev->devfn);
+			list_del(&dev->global_list);
+			list_add_tail(&dev->global_list, &sorted_devices);
+		}
+	}
+	list_splice(&sorted_devices, &pci_devices);
+}
+
+/*
+ *  BIOS Functions for IRQ Routing
+ */
+
+struct irq_routing_options {
+	u16 size;
+	struct irq_info *table;
+	u16 segment;
+} __attribute__((packed));
+
+struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
+{
+	struct irq_routing_options opt;
+	struct irq_routing_table *rt = NULL;
+	int ret, map;
+	unsigned long page;
+
+	if (!pci_bios_present)
+		return NULL;
+	page = __get_free_page(GFP_KERNEL);
+	if (!page)
+		return NULL;
+	opt.table = (struct irq_info *) page;
+	opt.size = PAGE_SIZE;
+	opt.segment = __KERNEL_DS;
+
+	DBG("PCI: Fetching IRQ routing table... ");
+	__asm__("push %%es\n\t"
+		"push %%ds\n\t"
+		"pop  %%es\n\t"
+		"lcall *(%%esi); cld\n\t"
+		"pop %%es\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret),
+		  "=b" (map)
+		: "0" (PCIBIOS_GET_ROUTING_OPTIONS),
+		  "1" (0),
+		  "D" ((long) &opt),
+		  "S" (&pci_indirect));
+	DBG("OK  ret=%d, size=%d, map=%x\n", ret, opt.size, map);
+	if (ret & 0xff00)
+		printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
+	else if (opt.size) {
+		rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
+		if (rt) {
+			memset(rt, 0, sizeof(struct irq_routing_table));
+			rt->size = opt.size + sizeof(struct irq_routing_table);
+			rt->exclusive_irqs = map;
+			memcpy(rt->slots, (void *) page, opt.size);
+			printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
+		}
+	}
+	free_page(page);
+	return rt;
+}
+
+
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
+{
+	int ret;
+
+	__asm__("lcall *(%%esi); cld\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret)
+		: "0" (PCIBIOS_SET_PCI_HW_INT),
+		  "b" ((dev->bus->number << 8) | dev->devfn),
+		  "c" ((irq << 8) | (pin + 10)),
+		  "S" (&pci_indirect));
+	return !(ret & 0xff00);
+}
+
+#endif
+
+/*
+ * Several buggy motherboards address only 16 devices and mirror
+ * them to next 16 IDs. We try to detect this `feature' on all
+ * primary buses (those containing host bridges as they are
+ * expected to be unique) and remove the ghost devices.
+ */
+
+static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
+{
+	struct list_head *ln, *mn;
+	struct pci_dev *d, *e;
+	int mirror = PCI_DEVFN(16,0);
+	int seen_host_bridge = 0;
+	int i;
+
+	DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
+	for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
+		d = pci_dev_b(ln);
+		if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+			seen_host_bridge++;
+		for (mn=ln->next; mn != &b->devices; mn=mn->next) {
+			e = pci_dev_b(mn);
+			if (e->devfn != d->devfn + mirror ||
+			    e->vendor != d->vendor ||
+			    e->device != d->device ||
+			    e->class != d->class)
+				continue;
+			for(i=0; i<PCI_NUM_RESOURCES; i++)
+				if (e->resource[i].start != d->resource[i].start ||
+				    e->resource[i].end != d->resource[i].end ||
+				    e->resource[i].flags != d->resource[i].flags)
+					continue;
+			break;
+		}
+		if (mn == &b->devices)
+			return;
+	}
+	if (!seen_host_bridge)
+		return;
+	printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
+
+	ln = &b->devices;
+	while (ln->next != &b->devices) {
+		d = pci_dev_b(ln->next);
+		if (d->devfn >= mirror) {
+			list_del(&d->global_list);
+			list_del(&d->bus_list);
+			kfree(d);
+		} else
+			ln = ln->next;
+	}
+}
+
+/*
+ * Discover remaining PCI buses in case there are peer host bridges.
+ * We use the number of last PCI bus provided by the PCI BIOS.
+ */
+static void __devinit pcibios_fixup_peer_bridges(void)
+{
+	int n;
+	struct pci_bus bus;
+	struct pci_dev dev;
+	u16 l;
+
+	if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+		return;
+	DBG("PCI: Peer bridge fixup\n");
+	for (n=0; n <= pcibios_last_bus; n++) {
+		if (pci_bus_exists(&pci_root_buses, n))
+			continue;
+		bus.number = n;
+		bus.ops = pci_root_ops;
+		dev.bus = &bus;
+		for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
+			if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
+			    l != 0x0000 && l != 0xffff) {
+				DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
+				printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
+				pci_scan_bus(n, pci_root_ops, NULL);
+				break;
+			}
+	}
+}
+
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ */
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+	/*
+	 * i450NX -- Find and scan all secondary buses on all PXB's.
+	 */
+	int pxb, reg;
+	u8 busno, suba, subb;
+#ifdef CONFIG_MULTIQUAD
+	int quad = BUS2QUAD(d->bus->number);
+#endif
+	printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name);
+	reg = 0xd0;
+	for(pxb=0; pxb<2; pxb++) {
+		pci_read_config_byte(d, reg++, &busno);
+		pci_read_config_byte(d, reg++, &suba);
+		pci_read_config_byte(d, reg++, &subb);
+		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+		if (busno)
+			pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL);	/* Bus A */
+		if (suba < subb)
+			pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL);	/* Bus B */
+	}
+	pcibios_last_bus = -1;
+}
+
+static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+{
+	/*
+	 * i450GX and i450KX -- Find and scan all secondary buses.
+	 * (called separately for each PCI bridge found)
+	 */
+	u8 busno;
+	pci_read_config_byte(d, 0x4a, &busno);
+	printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno);
+	pci_scan_bus(busno, pci_root_ops, NULL);
+	pcibios_last_bus = -1;
+}
+
+static void __devinit  pci_fixup_umc_ide(struct pci_dev *d)
+{
+	/*
+	 * UM8886BF IDE controller sets region type bits incorrectly,
+	 * therefore they look like memory despite of them being I/O.
+	 */
+	int i;
+
+	printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name);
+	for(i=0; i<4; i++)
+		d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static void __devinit  pci_fixup_ncr53c810(struct pci_dev *d)
+{
+	/*
+	 * NCR 53C810 returns class code 0 (at least on some systems).
+	 * Fix class to be PCI_CLASS_STORAGE_SCSI
+	 */
+	if (!d->class) {
+		printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name);
+		d->class = PCI_CLASS_STORAGE_SCSI << 8;
+	}
+}
+
+static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
+{
+	int i;
+
+	/*
+	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
+	 */
+	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+	DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
+	for(i=0; i<4; i++) {
+		struct resource *r = &d->resource[i];
+		if ((r->start & ~0x80) == 0x374) {
+			r->start |= 2;
+			r->end = r->start;
+		}
+	}
+}
+
+static void __devinit  pci_fixup_ide_trash(struct pci_dev *d)
+{
+	int i;
+
+	/*
+	 * There exist PCI IDE controllers which have utter garbage
+	 * in first four base registers. Ignore that.
+	 */
+	DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
+	for(i=0; i<4; i++)
+		d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
+}
+
+static void __devinit  pci_fixup_latency(struct pci_dev *d)
+{
+	/*
+	 *  SiS 5597 and 5598 chipsets require latency timer set to
+	 *  at most 32 to avoid lockups.
+	 */
+	DBG("PCI: Setting max latency to 32\n");
+	pcibios_max_latency = 32;
+}
+
+static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+{
+	/*
+	 * PIIX4 ACPI device: hardwired IRQ9
+	 */
+	d->irq = 9;
+}
+
+/*
+ * Addresses issues with problems in the memory write queue timer in
+ * certain VIA Northbridges.  This bugfix is per VIA's specifications,
+ * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
+ * to trigger a bug in its integrated ProSavage video card, which
+ * causes screen corruption.  We only clear bits 6 and 7 for that chipset,
+ * until VIA can provide us with definitive information on why screen
+ * corruption occurs, and what exactly those bits do.
+ * 
+ * VIA 8363,8622,8361 Northbridges:
+ *  - bits  5, 6, 7 at offset 0x55 need to be turned off
+ * VIA 8367 (KT266x) Northbridges:
+ *  - bits  5, 6, 7 at offset 0x95 need to be turned off
+ * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
+ *  - bits     6, 7 at offset 0x55 need to be turned off
+ */
+
+#define VIA_8363_KL133_REVISION_ID 0x81
+#define VIA_8363_KM133_REVISION_ID 0x84
+
+static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d)
+{
+	u8 v;
+	u8 revision;
+	int where = 0x55;
+	int mask = 0x1f; /* clear bits 5, 6, 7 by default */
+
+	pci_read_config_byte(d, PCI_REVISION_ID, &revision);
+	
+	if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
+		/* fix pci bus latency issues resulted by NB bios error
+		   it appears on bug free^Wreduced kt266x's bios forces
+		   NB latency to zero */
+		pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
+
+		where = 0x95; /* the memory write queue timer register is 
+				 different for the KT266x's: 0x95 not 0x55 */
+	} else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
+	           (revision == VIA_8363_KL133_REVISION_ID || 
+		    revision == VIA_8363_KM133_REVISION_ID)) {
+		mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
+				causes screen corruption on the KL133/KM133 */
+	}
+
+	pci_read_config_byte(d, where, &v);
+	if (v & ~mask) {
+		printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
+			d->device, revision, where, v, mask, v & mask);
+		v &= mask;
+		pci_write_config_byte(d, where, v);
+	}
+}
+
+/*
+ * For some reasons Intel decided that certain parts of their
+ * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
+ * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
+ * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
+ * to Intel terminology. These devices do forward all addresses from
+ * system to PCI bus no matter what are their window settings, so they are
+ * "transparent" (or subtractive decoding) from programmers point of view.
+ */
+static void __init pci_fixup_transparent_bridge(struct pci_dev *dev)
+{
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+	    (dev->device & 0xff00) == 0x2400)
+		dev->transparent = 1;
+}
+
+struct pci_fixup pcibios_fixups[] = {
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82451NX,	pci_fixup_i450nx },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82454GX,	pci_fixup_i450gx },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_UMC,	PCI_DEVICE_ID_UMC_UM8886BF,	pci_fixup_umc_ide },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_5513,		pci_fixup_ide_trash },
+	{ PCI_FIXUP_HEADER,	PCI_ANY_ID,		PCI_ANY_ID,			pci_fixup_ide_bases },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_5597,		pci_fixup_latency },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_5598,		pci_fixup_latency },
+ 	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82371AB_3,	pci_fixup_piix4_acpi },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8363_0,	pci_fixup_via_northbridge_bug },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8622,	        pci_fixup_via_northbridge_bug },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8361,	        pci_fixup_via_northbridge_bug },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8367_0,	pci_fixup_via_northbridge_bug },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_NCR,	PCI_DEVICE_ID_NCR_53C810,	pci_fixup_ncr53c810 },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_ANY_ID,			pci_fixup_transparent_bridge },
+	{ 0 }
+};
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+
+void __devinit  pcibios_fixup_bus(struct pci_bus *b)
+{
+	pcibios_fixup_ghosts(b);
+	pci_read_bridge_bases(b);
+}
+
+
+void __devinit pcibios_config_init(void)
+{
+	/*
+	 * Try all known PCI access methods. Note that we support using 
+	 * both PCI BIOS and direct access, with a preference for direct.
+	 */
+
+#ifdef CONFIG_PCI_DIRECT
+	struct pci_ops *tmp = NULL;
+#endif
+
+
+#ifdef CONFIG_PCI_BIOS
+	if ((pci_probe & PCI_PROBE_BIOS) 
+		&& ((pci_root_ops = pci_find_bios()))) {
+		pci_probe |= PCI_BIOS_SORT;
+		pci_bios_present = 1;
+		pci_config_read = pci_bios_read;
+		pci_config_write = pci_bios_write;
+	}
+#endif
+
+#ifdef CONFIG_PCI_DIRECT
+	if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2)) 
+		&& (tmp = pci_check_direct())) {
+		pci_root_ops = tmp;
+		if (pci_root_ops == &pci_direct_conf1) {
+			pci_config_read = pci_conf1_read;
+			pci_config_write = pci_conf1_write;
+		}
+		else {
+			pci_config_read = pci_conf2_read;
+			pci_config_write = pci_conf2_write;
+		}
+	}
+#endif
+
+	return;
+}
+
+void __init pcibios_init(void)
+{
+	int quad;
+
+	if (!pci_root_ops)
+		pcibios_config_init();
+	if (!pci_root_ops) {
+		printk(KERN_WARNING "PCI: System does not support PCI\n");
+		return;
+	}
+
+	printk(KERN_INFO "PCI: Probing PCI hardware\n");
+	pci_root_bus = pci_scan_bus(0, pci_root_ops, NULL);
+	if (clustered_apic_mode && (numnodes > 1)) {
+		for (quad = 1; quad < numnodes; ++quad) {
+			printk("Scanning PCI bus %d for quad %d\n", 
+				QUADLOCAL2BUS(quad,0), quad);
+			pci_scan_bus(QUADLOCAL2BUS(quad,0), 
+				pci_root_ops, NULL);
+		}
+	}
+
+	pcibios_irq_init();
+	pcibios_fixup_peer_bridges();
+	pcibios_fixup_irqs();
+	pcibios_resource_survey();
+
+#ifdef CONFIG_PCI_BIOS
+	if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+		pcibios_sort();
+#endif
+}
+
+char * __devinit  pcibios_setup(char *str)
+{
+	if (!strcmp(str, "off")) {
+		pci_probe = 0;
+		return NULL;
+	}
+#ifdef CONFIG_PCI_BIOS
+	else if (!strcmp(str, "bios")) {
+		pci_probe = PCI_PROBE_BIOS;
+		return NULL;
+	} else if (!strcmp(str, "nobios")) {
+		pci_probe &= ~PCI_PROBE_BIOS;
+		return NULL;
+	} else if (!strcmp(str, "nosort")) {
+		pci_probe |= PCI_NO_SORT;
+		return NULL;
+	} else if (!strcmp(str, "biosirq")) {
+		pci_probe |= PCI_BIOS_IRQ_SCAN;
+		return NULL;
+	}
+#endif
+#ifdef CONFIG_PCI_DIRECT
+	else if (!strcmp(str, "conf1")) {
+		pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
+		return NULL;
+	}
+	else if (!strcmp(str, "conf2")) {
+		pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
+		return NULL;
+	}
+#endif
+	else if (!strcmp(str, "rom")) {
+		pci_probe |= PCI_ASSIGN_ROMS;
+		return NULL;
+	} else if (!strcmp(str, "assign-busses")) {
+		pci_probe |= PCI_ASSIGN_ALL_BUSSES;
+		return NULL;
+	} else if (!strncmp(str, "irqmask=", 8)) {
+		pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
+		return NULL;
+	} else if (!strncmp(str, "lastbus=", 8)) {
+		pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+		return NULL;
+	}
+	return str;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+	return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	int err;
+
+	if ((err = pcibios_enable_resources(dev, mask)) < 0)
+		return err;
+	pcibios_enable_irq(dev);
+	return 0;
+}
diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c
new file mode 100644
index 0000000000..3c048d72bf
--- /dev/null
+++ b/xen/arch/i386/process.c
@@ -0,0 +1,418 @@
+/*
+ *  linux/arch/i386/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/smp.h>
+#include <asm/ptrace.h>
+#include <xeno/delay.h>
+#include <xeno/interrupt.h>
+#include <asm/mc146818rtc.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include <xeno/irq.h>
+#include <xeno/event.h>
+
+asmlinkage void ret_from_newdomain(void) __asm__("ret_from_newdomain");
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+    hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+    hlt_counter--;
+}
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+static void default_idle(void)
+{
+    if (!hlt_counter) {
+        __cli();
+        if (!current->hyp_events && !softirq_pending(smp_processor_id()))
+            safe_halt();
+        else
+            __sti();
+    }
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+    int cpu = smp_processor_id();
+
+    ASSERT(current->domain == IDLE_DOMAIN_ID);
+
+    current->has_cpu = 1;
+    (void)wake_up(current);
+    schedule();
+
+    /*
+     * Declares CPU setup done to the boot processor.
+     * Therefore memory barrier to ensure state is visible.
+     */
+    smp_mb();
+    init_idle();
+
+    for ( ; ; )
+    {
+        while (!current->hyp_events && !softirq_pending(cpu))
+            default_idle();
+        do_hyp_events();
+        do_softirq();
+    }
+}
+
+static long no_idt[2];
+static int reboot_mode;
+int reboot_thru_bios = 0;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c)	((c) >= '0' && (c) <= '9')
+#endif
+
+
+static inline void kb_wait(void)
+{
+    int i;
+
+    for (i=0; i<0x10000; i++)
+        if ((inb_p(0x64) & 0x02) == 0)
+            break;
+}
+
+
+void machine_restart(char * __unused)
+{
+#if CONFIG_SMP
+    int cpuid;
+	
+    cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+    if (reboot_smp) {
+
+        /* check to see if reboot_cpu is valid 
+           if its not, default to the BSP */
+        if ((reboot_cpu == -1) ||  
+            (reboot_cpu > (NR_CPUS -1))  || 
+            !(phys_cpu_present_map & (1<<cpuid))) 
+            reboot_cpu = boot_cpu_physical_apicid;
+
+        reboot_smp = 0;  /* use this as a flag to only go through this once*/
+        /* re-run this function on the other CPUs
+           it will fall though this section since we have 
+           cleared reboot_smp, and do the reboot if it is the
+           correct CPU, otherwise it halts. */
+        if (reboot_cpu != cpuid)
+            smp_call_function((void *)machine_restart , NULL, 1, 0);
+    }
+
+    /* if reboot_cpu is still -1, then we want a tradional reboot, 
+       and if we are not running on the reboot_cpu,, halt */
+    if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+        for (;;)
+            __asm__ __volatile__ ("hlt");
+    }
+    /*
+     * Stop all CPUs and turn off local APICs and the IO-APIC, so
+     * other OSs see a clean IRQ state.
+     */
+    smp_send_stop();
+    disable_IO_APIC();
+#endif
+
+    if(!reboot_thru_bios) {
+        /* rebooting needs to touch the page at absolute addr 0 */
+        *((unsigned short *)__va(0x472)) = reboot_mode;
+        for (;;) {
+            int i;
+            for (i=0; i<100; i++) {
+                kb_wait();
+                udelay(50);
+                outb(0xfe,0x64);         /* pulse reset low */
+                udelay(50);
+            }
+            /* That didn't work - force a triple fault.. */
+            __asm__ __volatile__("lidt %0": :"m" (no_idt));
+            __asm__ __volatile__("int3");
+        }
+    }
+
+    panic("Need to reinclude BIOS reboot code\n");
+}
+
+void machine_halt(void)
+{
+    machine_restart(0);
+}
+
+void machine_power_off(void)
+{
+    machine_restart(0);
+}
+
+extern void show_trace(unsigned long* esp);
+
+void show_regs(struct pt_regs * regs)
+{
+    unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
+    printk("\n");
+    printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
+    if (regs->xcs & 3)
+        printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+    printk(" EFLAGS: %08lx\n",regs->eflags);
+    printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+           regs->eax,regs->ebx,regs->ecx,regs->edx);
+    printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+           regs->esi, regs->edi, regs->ebp);
+    printk(" DS: %04x ES: %04x\n",
+           0xffff & regs->xds,0xffff & regs->xes);
+
+    __asm__("movl %%cr0, %0": "=r" (cr0));
+    __asm__("movl %%cr2, %0": "=r" (cr2));
+    __asm__("movl %%cr3, %0": "=r" (cr3));
+    /* This could fault if %cr4 does not exist */
+    __asm__("1: movl %%cr4, %0		\n"
+            "2:				\n"
+            ".section __ex_table,\"a\"	\n"
+            ".long 1b,2b			\n"
+            ".previous			\n"
+            : "=r" (cr4): "0" (0));
+    printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+    show_trace(&regs->esp);
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void release_segments(struct mm_struct *mm)
+{
+#if 0
+    void * ldt = mm.context.segments;
+
+    /*
+     * free the LDT
+     */
+    if (ldt) {
+        mm.context.segments = NULL;
+        clear_LDT();
+        vfree(ldt);
+    }
+#endif
+}
+
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+    /* nothing to do ... */
+}
+
+void flush_thread(void)
+{
+    struct task_struct *tsk = current;
+
+    memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+    /*
+     * Forget coprocessor state..
+     */
+    clear_fpu(tsk);
+    tsk->flags &= ~PF_DONEFPUINIT;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+#if 0
+    if (dead_task->mm) {
+        void * ldt = dead_task->mm.context.segments;
+
+        // temporary debugging check
+        if (ldt) {
+            printk("WARNING: dead process %8s still has LDT? <%p>\n",
+                   dead_task->comm, ldt);
+            BUG();
+        }
+    }
+#endif
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+{
+#if 0
+    struct mm_struct * old_mm;
+    void *old_ldt, *ldt;
+
+    ldt = NULL;
+    old_mm = current->mm;
+    if (old_mm && (old_ldt = old_mm.context.segments) != NULL) {
+        /*
+         * Completely new LDT, we initialize it from the parent:
+         */
+        ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+        if (!ldt)
+            printk(KERN_WARNING "ldt allocation failed\n");
+        else
+            memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+    }
+    new_mm.context.segments = ldt;
+    new_mm.context.cpuvalid = ~0UL;	/* valid on all CPU's - they can't have stale data */
+#endif
+}
+
+
+void new_thread(struct task_struct *p,
+                unsigned long start_pc,
+                unsigned long start_stack,
+                unsigned long start_info)
+{
+    struct pt_regs * regs;
+
+    regs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
+    memset(regs, 0, sizeof(*regs));
+
+    /*
+     * Initial register values:
+     *  DS,ES,FS,GS = __GUEST_DS
+     *       CS:EIP = __GUEST_CS:start_pc
+     *       SS:ESP = __GUEST_DS:start_stack
+     *          ESI = start_info
+     *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
+     */
+    p->thread.fs = p->thread.gs = __GUEST_DS;
+    regs->xds = regs->xes = regs->xss = __GUEST_DS;
+    regs->xcs = __GUEST_CS;
+    regs->eip = start_pc;
+    regs->esp = start_stack;
+    regs->esi = start_info;
+
+    p->thread.esp = (unsigned long) regs;
+    p->thread.esp0 = (unsigned long) (regs+1);
+
+    p->thread.eip = (unsigned long) ret_from_newdomain;
+
+    __save_flags(regs->eflags);
+    regs->eflags |= X86_EFLAGS_IF;
+
+    /* No fast trap at start of day. */
+    SET_DEFAULT_FAST_TRAP(&p->thread);
+}
+
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+		__asm__("movl %0,%%db" #register  \
+			: /* no output */ \
+			:"r" (thread->debugreg[register]))
+
+/*
+ *	switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ */
+/* NB. prev_p passed in %eax, next_p passed in %edx */
+void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+    struct thread_struct *prev = &prev_p->thread,
+        *next = &next_p->thread;
+    struct tss_struct *tss = init_tss + smp_processor_id();
+
+    unlazy_fpu(prev_p);
+
+    /* Switch the fast-trap handler. */
+    CLEAR_FAST_TRAP(&prev_p->thread);
+    SET_FAST_TRAP(&next_p->thread);
+
+    tss->esp0 = next->esp0;
+    tss->esp1 = next->esp1;
+    tss->ss1  = next->ss1;
+
+    /*
+     * Save away %fs and %gs. No need to save %es and %ds, as
+     * those are always kernel segments while inside the kernel.
+     */
+    asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+    asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+
+    /* Switch GDT and LDT. */
+    __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
+    __load_LDT(next_p->mm.ldt_sel);
+
+    /*
+     * Restore %fs and %gs.
+     */
+    loadsegment(fs, next->fs);
+    loadsegment(gs, next->gs);
+
+    /*
+     * Now maybe reload the debug registers
+     */
+    if (next->debugreg[7]){
+        loaddebug(next, 0);
+        loaddebug(next, 1);
+        loaddebug(next, 2);
+        loaddebug(next, 3);
+        /* no 4 and 5 */
+        loaddebug(next, 6);
+        loaddebug(next, 7);
+    }
+
+}
diff --git a/xen/arch/i386/rwlock.c b/xen/arch/i386/rwlock.c
new file mode 100644
index 0000000000..3b9b689c8a
--- /dev/null
+++ b/xen/arch/i386/rwlock.c
@@ -0,0 +1,33 @@
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+
+#if defined(CONFIG_SMP)
+asm(
+"
+.align  4
+.globl  __write_lock_failed
+__write_lock_failed:
+        " LOCK "addl    $" RW_LOCK_BIAS_STR ",(%eax)
+1:      rep; nop
+        cmpl    $" RW_LOCK_BIAS_STR ",(%eax)
+        jne     1b
+
+        " LOCK "subl    $" RW_LOCK_BIAS_STR ",(%eax)
+        jnz     __write_lock_failed
+        ret
+
+
+.align  4
+.globl  __read_lock_failed
+__read_lock_failed:
+        lock ; incl     (%eax)
+1:      rep; nop
+        cmpl    $1,(%eax)
+        js      1b
+
+        lock ; decl     (%eax)
+        js      __read_lock_failed
+        ret
+"
+);
+#endif
diff --git a/xen/arch/i386/setup.c b/xen/arch/i386/setup.c
new file mode 100644
index 0000000000..f4f62c2b02
--- /dev/null
+++ b/xen/arch/i386/setup.c
@@ -0,0 +1,375 @@
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/domain_page.h>
+
+struct cpuinfo_x86 boot_cpu_data = { 0 };
+/* Lots of nice things, since we only target PPro+. */
+unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
+unsigned long wait_init_idle;
+
+/* Basic page table for each CPU in the system. */
+l2_pgentry_t *idle_pg_table[NR_CPUS] = { idle0_pg_table };
+
+/* for asm/domain_page.h, map_domain_page() */
+unsigned long *mapcache[NR_CPUS];
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+    u32 f1, f2;
+
+    asm("pushfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "movl %0,%1\n\t"
+        "xorl %2,%0\n\t"
+        "pushl %0\n\t"
+        "popfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "popfl\n\t"
+        : "=&r" (f1), "=&r" (f2)
+        : "ir" (flag));
+
+    return ((f1^f2) & flag) != 0;
+}
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+    return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+        char *v = c->x86_vendor_id;
+
+        if (!strcmp(v, "GenuineIntel"))
+                c->x86_vendor = X86_VENDOR_INTEL;
+        else if (!strcmp(v, "AuthenticAMD"))
+                c->x86_vendor = X86_VENDOR_AMD;
+        else if (!strcmp(v, "CyrixInstead"))
+                c->x86_vendor = X86_VENDOR_CYRIX;
+        else if (!strcmp(v, "UMC UMC UMC "))
+                c->x86_vendor = X86_VENDOR_UMC;
+        else if (!strcmp(v, "CentaurHauls"))
+                c->x86_vendor = X86_VENDOR_CENTAUR;
+        else if (!strcmp(v, "NexGenDriven"))
+                c->x86_vendor = X86_VENDOR_NEXGEN;
+        else if (!strcmp(v, "RiseRiseRise"))
+                c->x86_vendor = X86_VENDOR_RISE;
+        else if (!strcmp(v, "GenuineTMx86") ||
+                 !strcmp(v, "TransmetaCPU"))
+                c->x86_vendor = X86_VENDOR_TRANSMETA;
+        else
+                c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+    /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
+    if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
+        clear_bit(X86_FEATURE_SEP, &c->x86_capability);
+}
+
+static void __init init_amd(struct cpuinfo_x86 *c)
+{
+    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+       3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+    clear_bit(0*32+31, &c->x86_capability);
+	
+    switch(c->x86)
+    {
+    case 5:
+        panic("AMD K6 is not supported.\n");
+    case 6:	/* An Athlon/Duron. We can trust the BIOS probably */
+        break;		
+    }
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+    extern int opt_noht, opt_noacpi;
+    int junk, i;
+    u32 xlvl, tfms;
+
+    c->x86_vendor = X86_VENDOR_UNKNOWN;
+    c->cpuid_level = -1;	/* CPUID not detected */
+    c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+    c->x86_vendor_id[0] = '\0'; /* Unset */
+    memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+    if ( !have_cpuid_p() )
+        panic("Ancient processors not supported\n");
+
+    /* Get vendor name */
+    cpuid(0x00000000, &c->cpuid_level,
+          (int *)&c->x86_vendor_id[0],
+          (int *)&c->x86_vendor_id[8],
+          (int *)&c->x86_vendor_id[4]);
+
+    get_cpu_vendor(c);
+		
+    if ( c->cpuid_level == 0 )
+        panic("Decrepit CPUID not supported\n");
+
+    cpuid(0x00000001, &tfms, &junk, &junk,
+          &c->x86_capability[0]);
+    c->x86 = (tfms >> 8) & 15;
+    c->x86_model = (tfms >> 4) & 15;
+    c->x86_mask = tfms & 15;
+
+    /* AMD-defined flags: level 0x80000001 */
+    xlvl = cpuid_eax(0x80000000);
+    if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+        if ( xlvl >= 0x80000001 )
+            c->x86_capability[1] = cpuid_edx(0x80000001);
+    }
+
+    /* Transmeta-defined flags: level 0x80860001 */
+    xlvl = cpuid_eax(0x80860000);
+    if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+        if (  xlvl >= 0x80860001 )
+            c->x86_capability[2] = cpuid_edx(0x80860001);
+    }
+
+    printk("CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_vendor);
+
+    switch ( c->x86_vendor ) {
+    case X86_VENDOR_INTEL:
+        init_intel(c);
+        break;
+    case X86_VENDOR_AMD:
+        init_amd(c);
+        break;
+    default:
+        panic("Only support Intel processors (P6+)\n");
+    }
+	
+    if ( opt_noht )
+    {
+        opt_noacpi = 1; /* Virtual CPUs only appear in ACPI tables. */
+        clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);
+    }
+
+    printk("CPU caps: %08x %08x %08x %08x\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_capability[3]);
+
+    /*
+     * On SMP, boot_cpu_data holds the common feature set between
+     * all CPUs; so make sure that we indicate which features are
+     * common between the CPUs.  The first time this routine gets
+     * executed, c == &boot_cpu_data.
+     */
+    if ( c != &boot_cpu_data ) {
+        /* AND the already accumulated flags with these */
+        for ( i = 0 ; i < NCAPINTS ; i++ )
+            boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+    }
+}
+
+
+unsigned long cpu_initialized;
+void __init cpu_init(void)
+{
+    int nr = smp_processor_id();
+    struct tss_struct * t = &init_tss[nr];
+    l2_pgentry_t *pl2e;
+
+    if ( test_and_set_bit(nr, &cpu_initialized) )
+        panic("CPU#%d already initialized!!!\n", nr);
+    printk("Initializing CPU#%d\n", nr);
+
+    /* Set up GDT and IDT. */
+    SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
+    SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
+    __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt));
+    __asm__ __volatile__("lidt %0": "=m" (idt_descr));
+
+    /* No nested task. */
+    __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+
+    /* Ensure FPU gets initialised for each domain. */
+    stts();
+
+    /* Set up and load the per-CPU TSS and LDT. */
+    t->ss0  = __HYPERVISOR_DS;
+    t->esp0 = current->thread.esp0;
+    set_tss_desc(nr,t);
+    load_TR(nr);
+    __asm__ __volatile__("lldt %%ax"::"a" (0));
+
+    /* Clear all 6 debug registers. */
+#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+    CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+#undef CD
+
+    /* Install correct page table. */
+    __asm__ __volatile__ ("movl %%eax,%%cr3"
+                          : : "a" (pagetable_val(current->mm.pagetable)));
+
+    /* Set up mapping cache for domain pages. */
+    pl2e = idle_pg_table[nr] + (MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT);
+    mapcache[nr] = (unsigned long *)get_free_page(GFP_KERNEL);
+    clear_page(mapcache[nr]);
+    *pl2e = mk_l2_pgentry(__pa(mapcache[nr]) | PAGE_HYPERVISOR);
+}
+
+static void __init do_initcalls(void)
+{
+        initcall_t *call;
+
+        call = &__initcall_start;
+        do {
+                (*call)();
+                call++;
+        } while (call < &__initcall_end);
+}
+
+/*
+ * IBM-compatible BIOSes place drive info tables at initial interrupt
+ * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs.
+ */
+struct drive_info_struct { unsigned char dummy[32]; } drive_info;
+void get_bios_driveinfo(void)
+{
+    unsigned long seg, off, tab1, tab2;
+
+    off  = (unsigned long)*(unsigned short *)(4*0x41+0);
+    seg  = (unsigned long)*(unsigned short *)(4*0x41+2);
+    tab1 = (seg<<4) + off;
+    
+    off  = (unsigned long)*(unsigned short *)(4*0x46+0);
+    seg  = (unsigned long)*(unsigned short *)(4*0x46+2);
+    tab2 = (seg<<4) + off;
+
+    printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n", 
+           tab1, tab2);
+
+    memcpy(drive_info.dummy+ 0, (char *)tab1, 16);
+    memcpy(drive_info.dummy+16, (char *)tab2, 16);
+}
+
+
+unsigned long pci_mem_start = 0x10000000;
+
+void __init start_of_day(void)
+{
+    extern void trap_init(void);
+    extern void init_IRQ(void);
+    extern void time_init(void);
+    extern void softirq_init(void);
+    extern void timer_bh(void);
+    extern void tqueue_bh(void);
+    extern void immediate_bh(void);
+    extern void init_timervecs(void);
+	extern void disable_pit(void);
+	extern void ac_timer_init(void);
+    extern int  setup_network_devices(void);
+    extern void net_init(void);
+    extern void initialize_block_io(void);
+    extern void initialize_keytable(); 
+    extern void initialize_serial(void);
+    extern void initialize_keyboard(void);
+
+    unsigned long low_mem_size;
+    
+    /*
+     * We do this early, but tables are in the lowest 1MB (usually
+     * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered.
+     */
+    get_bios_driveinfo();
+
+    /* Tell the PCI layer not to allocate too close to the RAM area.. */
+    low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+    if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
+    
+    identify_cpu(&boot_cpu_data); /* get CPU type info */
+    if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
+    if ( cpu_has_xmm )  set_in_cr4(X86_CR4_OSXMMEXCPT);
+    find_smp_config();            /* find ACPI tables */
+    smp_alloc_memory();           /* trampoline which other CPUs jump at */
+    paging_init();                /* not much here now, but sets up fixmap */
+    if ( smp_found_config ) get_smp_config();
+    domain_init();
+	scheduler_init();	
+    trap_init();
+    init_IRQ();  /* installs simple interrupt wrappers. Starts HZ clock. */
+    time_init(); /* installs software handler for HZ clock. */
+    softirq_init();
+    init_timervecs();
+    init_bh(TIMER_BH, timer_bh);
+    init_bh(TQUEUE_BH, tqueue_bh);
+    init_bh(IMMEDIATE_BH, immediate_bh);
+    init_apic_mappings(); /* make APICs addressable in our pagetables. */
+
+#ifndef CONFIG_SMP    
+    APIC_init_uniprocessor();
+#else
+    smp_boot_cpus(); /*
+                      * Does loads of stuff, including kicking the local
+                      * APIC, and the IO APIC after other CPUs are booted.
+                      * Each IRQ is preferably handled by IO-APIC, but
+                      * fall thru to 8259A if we have to (but slower).
+                      */
+#endif
+    initialize_keytable(); /* call back handling for key codes      */
+
+	disable_pit();		/* not needed anymore */
+	ac_timer_init();    /* init accurate timers */
+	init_xeno_time();	/* initialise the time */
+	schedulers_start(); /* start scheduler for each CPU */
+
+    sti();
+
+    zap_low_mappings();
+    kmem_cache_init();
+    kmem_cache_sizes_init(max_page);
+#ifdef CONFIG_PCI
+    pci_init();
+#endif
+    do_initcalls();
+
+
+    initialize_serial();   /* setup serial 'driver' (for debugging) */
+    initialize_keyboard(); /* setup keyboard (also for debugging)   */
+
+    if ( !setup_network_devices() )
+        panic("Must have a network device!\n");
+    net_init();            /* initializes virtual network system. */
+    initialize_block_io(); /* setup block devices */
+
+
+#ifdef CONFIG_SMP
+    wait_init_idle = cpu_online_map;
+    clear_bit(smp_processor_id(), &wait_init_idle);
+    smp_threads_ready = 1;
+    smp_commence(); /* Tell other CPUs that state of the world is stable. */
+    while (wait_init_idle) 
+    {
+        cpu_relax();
+        barrier();
+    }
+#endif
+}
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
new file mode 100644
index 0000000000..008d1aa83a
--- /dev/null
+++ b/xen/arch/i386/smp.c
@@ -0,0 +1,578 @@
+/*
+ *	Intel SMP support routines.
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	This code is released under the GNU General Public License version 2 or
+ *	later.
+ */
+
+#include <xeno/irq.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/spinlock.h>
+#include <asm/smp.h>
+#include <asm/mc146818rtc.h>
+#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
+
+/*
+ *	Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *	The Linux implications for SMP are handled as follows:
+ *
+ *	Pentium III / [Xeon]
+ *		None of the E1AP-E3AP errata are visible to the user.
+ *
+ *	E1AP.	see PII A1AP
+ *	E2AP.	see PII A2AP
+ *	E3AP.	see PII A3AP
+ *
+ *	Pentium II / [Xeon]
+ *		None of the A1AP-A3AP errata are visible to the user.
+ *
+ *	A1AP.	see PPro 1AP
+ *	A2AP.	see PPro 2AP
+ *	A3AP.	see PPro 7AP
+ *
+ *	Pentium Pro
+ *		None of 1AP-9AP errata are visible to the normal user,
+ *	except occasional delivery of 'spurious interrupt' as trap #15.
+ *	This is very rare and a non-problem.
+ *
+ *	1AP.	Linux maps APIC as non-cacheable
+ *	2AP.	worked around in hardware
+ *	3AP.	fixed in C0 and above steppings microcode update.
+ *		Linux does not use excessive STARTUP_IPIs.
+ *	4AP.	worked around in hardware
+ *	5AP.	symmetric IO mode (normal Linux operation) not affected.
+ *		'noapic' mode has vector 0xf filled out properly.
+ *	6AP.	'noapic' mode might be affected - fixed in later steppings
+ *	7AP.	We do not assume writes to the LVT deassering IRQs
+ *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
+ *	9AP.	We do not use mixed mode
+ *
+ *	Pentium
+ *		There is a marginal case where REP MOVS on 100MHz SMP
+ *	machines with B stepping processors can fail. XXX should provide
+ *	an L1cache=Writethrough or L1cache=off option.
+ *
+ *		B stepping CPUs may hang. There are hardware work arounds
+ *	for this. We warn about it in case your board doesnt have the work
+ *	arounds. Basically thats so I can tell anyone with a B stepping
+ *	CPU and SMP problems "tough".
+ *
+ *	Specific items [From Pentium Processor Specification Update]
+ *
+ *	1AP.	Linux doesn't use remote read
+ *	2AP.	Linux doesn't trust APIC errors
+ *	3AP.	We work around this
+ *	4AP.	Linux never generated 3 interrupts of the same priority
+ *		to cause a lost local interrupt.
+ *	5AP.	Remote read is never used
+ *	6AP.	not affected - worked around in hardware
+ *	7AP.	not affected - worked around in hardware
+ *	8AP.	worked around in hardware - we get explicit CS errors if not
+ *	9AP.	only 'noapic' mode affected. Might generate spurious
+ *		interrupts, we log only the first one and count the
+ *		rest silently.
+ *	10AP.	not affected - worked around in hardware
+ *	11AP.	Linux reads the APIC between writes to avoid this, as per
+ *		the documentation. Make sure you preserve this as it affects
+ *		the C stepping chips too.
+ *	12AP.	not affected - worked around in hardware
+ *	13AP.	not affected - worked around in hardware
+ *	14AP.	we always deassert INIT during bootup
+ *	15AP.	not affected - worked around in hardware
+ *	16AP.	not affected - worked around in hardware
+ *	17AP.	not affected - worked around in hardware
+ *	18AP.	not affected - worked around in hardware
+ *	19AP.	not affected - worked around in BIOS
+ *
+ *	If this sounds worrying believe me these bugs are either ___RARE___,
+ *	or are signal timing bugs worked around in hardware and there's
+ *	about nothing of note with C stepping upwards.
+ */
+
+/* The 'big kernel lock' */
+spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { 0 }};
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+    return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+    return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+    /*
+     * Subtle. In the case of the 'never do double writes' workaround
+     * we have to lock out interrupts to be safe.  As we don't care
+     * of the value read we use an atomic rmw access to avoid costly
+     * cli/sti.  Otherwise we use an even cheaper single atomic write
+     * to the APIC.
+	 */
+    unsigned int cfg;
+
+    /*
+	 * Wait for idle.
+	 */
+    apic_wait_icr_idle();
+
+    /*
+	 * No need to touch the target chip field
+	 */
+    cfg = __prepare_ICR(shortcut, vector);
+
+    /*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+    apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self(int vector)
+{
+    __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_mask_bitmask(int mask, int vector)
+{
+    unsigned long cfg;
+    unsigned long flags;
+
+    __save_flags(flags);
+    __cli();
+
+		
+    /*
+     * Wait for idle.
+     */
+    apic_wait_icr_idle();
+		
+    /*
+     * prepare target chip field
+     */
+    cfg = __prepare_ICR2(mask);
+    apic_write_around(APIC_ICR2, cfg);
+		
+    /*
+     * program the ICR 
+     */
+    cfg = __prepare_ICR(0, vector);
+			
+    /*
+     * Send the IPI. The write to APIC_ICR fires this off.
+     */
+    apic_write_around(APIC_ICR, cfg);
+
+    __restore_flags(flags);
+}
+
+static inline void send_IPI_mask_sequence(int mask, int vector)
+{
+    unsigned long cfg, flags;
+    unsigned int query_cpu, query_mask;
+
+    __save_flags(flags);
+    __cli();
+
+    for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+        query_mask = 1 << query_cpu;
+        if (query_mask & mask) {
+		
+            /*
+             * Wait for idle.
+             */
+            apic_wait_icr_idle();
+		
+            /*
+             * prepare target chip field
+             */
+            cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
+            apic_write_around(APIC_ICR2, cfg);
+		
+            /*
+             * program the ICR 
+             */
+            cfg = __prepare_ICR(0, vector);
+			
+            /*
+             * Send the IPI. The write to APIC_ICR fires this off.
+             */
+            apic_write_around(APIC_ICR, cfg);
+        }
+    }
+    __restore_flags(flags);
+}
+
+static inline void send_IPI_mask(int mask, int vector)
+{
+    send_IPI_mask_bitmask(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+    /*
+     * if there are no other CPUs in the system then
+     * we get an APIC send error if we try to broadcast.
+     * thus we have to avoid sending IPIs in this case.
+     */
+    if (!(smp_num_cpus > 1))
+        return;
+
+    __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+    __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+/*
+ *	Smarter SMP flushing macros. 
+ *		c/o Linus Torvalds.
+ *
+ *	These mean you can really definitely utterly forget about
+ *	writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *	Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static volatile unsigned long flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
+#define FLUSH_ALL	0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm.cpu_vm_mask.
+ */
+static void inline leave_mm (unsigned long cpu)
+{
+    if (cpu_tlbstate[cpu].state == TLBSTATE_OK)
+        BUG();
+    clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
+ * 	Stop ipi delivery for the old mm. This is not synchronized with
+ * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * 	for the wrong mm, and in the worst case we perform a superflous
+ * 	tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *	was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * 	Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
+ * 	Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *	flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * 	Atomically set the bit [other cpus will start sending flush ipis],
+ * 	and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+asmlinkage void smp_invalidate_interrupt (void)
+{
+    unsigned long cpu = smp_processor_id();
+
+    if (!test_bit(cpu, &flush_cpumask))
+        return;
+    /* 
+     * This was a BUG() but until someone can quote me the
+     * line from the intel manual that guarantees an IPI to
+     * multiple CPUs is retried _only_ on the erroring CPUs
+     * its staying as a return
+     *
+     * BUG();
+     */
+		 
+    if (flush_mm == cpu_tlbstate[cpu].active_mm) {
+        if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
+            if (flush_va == FLUSH_ALL)
+                local_flush_tlb();
+            else
+                __flush_tlb_one(flush_va);
+        } else
+            leave_mm(cpu);
+    }
+    ack_APIC_irq();
+    clear_bit(cpu, &flush_cpumask);
+}
+
+static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
+                              unsigned long va)
+{
+    /*
+     * A couple of (to be removed) sanity checks:
+	 *
+	 * - we do not send IPIs to not-yet booted CPUs.
+	 * - current CPU must not be in mask
+	 * - mask must exist :)
+	 */
+    if (!cpumask)
+        BUG();
+    if ((cpumask & cpu_online_map) != cpumask)
+        BUG();
+    if (cpumask & (1 << smp_processor_id()))
+        BUG();
+    if (!mm)
+        BUG();
+
+	/*
+	 * i'm not happy about this global shared spinlock in the
+	 * MM hot path, but we'll see how contended it is.
+	 * Temporarily this turns IRQs off, so that lockups are
+	 * detected by the NMI watchdog.
+	 */
+    spin_lock(&tlbstate_lock);
+	
+    flush_mm = mm;
+    flush_va = va;
+    atomic_set_mask(cpumask, &flush_cpumask);
+    /*
+     * We have to send the IPI only to
+     * CPUs affected.
+	 */
+    send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+    while (flush_cpumask)
+        /* nothing. lockup detection does not belong here */;
+
+    flush_mm = NULL;
+    flush_va = 0;
+    spin_unlock(&tlbstate_lock);
+}
+	
+void flush_tlb_current_task(void)
+{
+    struct mm_struct *mm = &current->mm;
+    unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+    local_flush_tlb();
+    if (cpu_mask)
+        flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+    unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+    if (current->active_mm == mm)
+        local_flush_tlb();
+    if (cpu_mask)
+        flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+#if 0
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+    struct mm_struct *mm = vma->vm_mm;
+    unsigned long cpu_mask = mm.cpu_vm_mask & ~(1 << smp_processor_id());
+
+    if (current->active_mm == mm) {
+        if(current->mm)
+            __flush_tlb_one(va);
+        else
+            leave_mm(smp_processor_id());
+    }
+
+    if (cpu_mask)
+        flush_tlb_others(cpu_mask, mm, va);
+}
+#endif
+
+static inline void do_flush_tlb_all_local(void)
+{
+    unsigned long cpu = smp_processor_id();
+
+    __flush_tlb_all();
+    if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY)
+        leave_mm(cpu);
+}
+
+static void flush_tlb_all_ipi(void* info)
+{
+    do_flush_tlb_all_local();
+}
+
+void flush_tlb_all(void)
+{
+    smp_call_function (flush_tlb_all_ipi,0,1,1);
+
+    do_flush_tlb_all_local();
+}
+
+void smp_send_event_check_mask(unsigned long cpu_mask)
+{
+    send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct {
+    void (*func) (void *info);
+    void *info;
+    atomic_t started;
+    atomic_t finished;
+    int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+                       int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler, or bottom halfs.
+ */
+{
+    struct call_data_struct data;
+    int cpus = smp_num_cpus-1;
+
+    if (!cpus)
+        return 0;
+
+    data.func = func;
+    data.info = info;
+    atomic_set(&data.started, 0);
+    data.wait = wait;
+    if (wait)
+        atomic_set(&data.finished, 0);
+
+    spin_lock(&call_lock);
+    call_data = &data;
+    wmb();
+    /* Send a message to all other CPUs and wait for them to respond */
+    send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+    /* Wait for response */
+    while (atomic_read(&data.started) != cpus)
+        barrier();
+
+    if (wait)
+        while (atomic_read(&data.finished) != cpus)
+            barrier();
+
+    spin_unlock(&call_lock);
+
+    return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+    /*
+     * Remove this CPU:
+     */
+    clear_bit(smp_processor_id(), &cpu_online_map);
+    __cli();
+    disable_local_APIC();
+    for(;;) __asm__("hlt");
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+    smp_call_function(stop_this_cpu, NULL, 1, 0);
+    smp_num_cpus = 1;
+
+    __cli();
+    disable_local_APIC();
+    __sti();
+}
+
+/*
+ * Nothing to do, as all the work is done automatically when
+ * we return from the interrupt.
+ */
+asmlinkage void smp_event_check_interrupt(void)
+{
+    ack_APIC_irq();
+}
+
+asmlinkage void smp_call_function_interrupt(void)
+{
+    void (*func) (void *info) = call_data->func;
+    void *info = call_data->info;
+    int wait = call_data->wait;
+
+    ack_APIC_irq();
+    /*
+     * Notify initiating CPU that I've grabbed the data and am
+     * about to execute the function
+     */
+    mb();
+    atomic_inc(&call_data->started);
+    /*
+     * At this point the info structure may be out of scope unless wait==1
+     */
+    (*func)(info);
+    if (wait) {
+        mb();
+        atomic_inc(&call_data->finished);
+    }
+}
+
diff --git a/xen/arch/i386/smpboot.c b/xen/arch/i386/smpboot.c
new file mode 100644
index 0000000000..0955db82f3
--- /dev/null
+++ b/xen/arch/i386/smpboot.c
@@ -0,0 +1,960 @@
+/*
+ *	x86 SMP booting functions
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Much of the core SMP work is based on previous work by Thomas Radke, to
+ *	whom a great many thanks are extended.
+ *
+ *	Thanks to Intel for making available several different Pentium,
+ *	Pentium Pro and Pentium-II/Xeon MP machines.
+ *	Original development of Linux SMP code supported by Caldera.
+ *
+ *	This code is released under the GNU General Public License version 2 or
+ *	later.
+ *
+ *	Fixes
+ *		Felix Koop	:	NR_CPUS used properly
+ *		Jose Renau	:	Handle single CPU case.
+ *		Alan Cox	:	By repeated request 8) - Total BogoMIP report.
+ *		Greg Wright	:	Fix for kernel stacks panic.
+ *		Erich Boleyn	:	MP v1.4 and additional changes.
+ *	Matthias Sattler	:	Changes for 2.1 kernel map.
+ *	Michel Lespinasse	:	Changes for 2.1 kernel map.
+ *	Michael Chastain	:	Change trampoline.S to gnu as.
+ *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
+ *		Ingo Molnar	:	Added APIC timers, based on code
+ *					from Jose Renau
+ *		Ingo Molnar	:	various cleanups and rewrites
+ *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
+ *		Martin J. Bligh	: 	Added support for multi-quad systems
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/mc146818rtc.h>
+#include <asm/smpboot.h>
+#include <xeno/smp.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/lib.h>
+
+/* Set if we find a B stepping CPU			*/
+static int smp_b_stepping;
+
+/* Setup configured maximum number of CPUs to activate */
+static int max_cpus = -1;
+
+/* Total count of live CPUs */
+int smp_num_cpus = 1;
+
+/* Bitmask of currently online CPUs */
+unsigned long cpu_online_map;
+
+static volatile unsigned long cpu_callin_map;
+static volatile unsigned long cpu_callout_map;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end  [];
+static unsigned char *trampoline_base;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+    memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+    return virt_to_phys(trampoline_base);
+}
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+    /*
+     * Has to be in very low memory so we can execute
+     * real-mode AP code.
+     */
+    trampoline_base = __va(0x90000);
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+void __init smp_store_cpu_info(int id)
+{
+    struct cpuinfo_x86 *c = cpu_data + id;
+
+    *c = boot_cpu_data;
+    c->pte_quick = 0;
+    c->pmd_quick = 0;
+    c->pgd_quick = 0;
+    c->pgtable_cache_sz = 0;
+    identify_cpu(c);
+    /*
+     * Mask B, Pentium, but not Pentium MMX
+     */
+    if (c->x86_vendor == X86_VENDOR_INTEL &&
+        c->x86 == 5 &&
+        c->x86_mask >= 1 && c->x86_mask <= 4 &&
+        c->x86_model <= 3)
+        /*
+         * Remember we have B step Pentia with bugs
+         */
+        smp_b_stepping = 1;
+}
+
+/*
+ * Architecture specific routine called by the kernel just before init is
+ * fired off. This allows the BP to have everything in order [we hope].
+ * At the end of this all the APs will hit the system scheduling and off
+ * we go. Each AP will load the system gdt's and jump through the kernel
+ * init into idle(). At this point the scheduler will one day take over
+ * and give them jobs to do. smp_callin is a standard routine
+ * we use to track CPUs as they power up.
+ */
+
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
+void __init smp_commence(void)
+{
+    /*
+     * Lets the callins below out of their loop.
+     */
+    Dprintk("Setting commenced=1, go go go\n");
+
+    wmb();
+    atomic_set(&smp_commenced,1);
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check wether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+/*
+ * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
+ * multiplication. Not terribly optimized but we need it at boot time only
+ * anyway.
+ *
+ * result == a / b
+ *	== (a1 + a2*(2^32)) / b
+ *	== a1/b + a2*(2^32/b)
+ *	== a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
+ *		    ^---- (this multiplication can overflow)
+ */
+
+static unsigned long long div64 (unsigned long long a, unsigned long b0)
+{
+    unsigned int a1, a2;
+    unsigned long long res;
+
+    a1 = ((unsigned int*)&a)[0];
+    a2 = ((unsigned int*)&a)[1];
+
+    res = a1/b0 +
+        (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
+        a2 / b0 +
+        (a2 * (0xffffffff % b0)) / b0;
+
+    return res;
+}
+
+static void __init synchronize_tsc_bp (void)
+{
+    int i;
+    unsigned long long t0;
+    unsigned long long sum, avg;
+    long long delta;
+    int buggy = 0;
+
+    printk("checking TSC synchronization across CPUs: ");
+
+    atomic_set(&tsc_start_flag, 1);
+    wmb();
+
+    /*
+     * We loop a few times to get a primed instruction cache,
+     * then the last pass is more or less synchronized and
+     * the BP and APs set their cycle counters to zero all at
+     * once. This reduces the chance of having random offsets
+     * between the processors, and guarantees that the maximum
+     * delay between the cycle counters is never bigger than
+     * the latency of information-passing (cachelines) between
+     * two CPUs.
+     */
+    for (i = 0; i < NR_LOOPS; i++) {
+        /*
+         * all APs synchronize but they loop on '== num_cpus'
+         */
+        while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+        atomic_set(&tsc_count_stop, 0);
+        wmb();
+        /*
+         * this lets the APs save their current TSC:
+         */
+        atomic_inc(&tsc_count_start);
+
+        rdtscll(tsc_values[smp_processor_id()]);
+        /*
+         * We clear the TSC in the last loop:
+         */
+        if (i == NR_LOOPS-1)
+            write_tsc(0, 0);
+
+        /*
+         * Wait for all APs to leave the synchronization point:
+         */
+        while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+        atomic_set(&tsc_count_start, 0);
+        wmb();
+        atomic_inc(&tsc_count_stop);
+    }
+
+    sum = 0;
+    for (i = 0; i < smp_num_cpus; i++) {
+        t0 = tsc_values[i];
+        sum += t0;
+    }
+    avg = div64(sum, smp_num_cpus);
+
+    sum = 0;
+    for (i = 0; i < smp_num_cpus; i++) {
+        delta = tsc_values[i] - avg;
+        if (delta < 0)
+            delta = -delta;
+        /*
+         * We report bigger than 2 microseconds clock differences.
+         */
+        if (delta > 2*ticks_per_usec) {
+            long realdelta;
+            if (!buggy) {
+                buggy = 1;
+                printk("\n");
+            }
+            realdelta = div64(delta, ticks_per_usec);
+            if (tsc_values[i] < avg)
+                realdelta = -realdelta;
+
+            printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
+                   i, realdelta);
+        }
+
+        sum += delta;
+    }
+    if (!buggy)
+        printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+    int i;
+
+    /*
+     * smp_num_cpus is not necessarily known at the time
+     * this gets called, so we first wait for the BP to
+     * finish SMP initialization:
+     */
+    while (!atomic_read(&tsc_start_flag)) mb();
+
+    for (i = 0; i < NR_LOOPS; i++) {
+        atomic_inc(&tsc_count_start);
+        while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+
+        rdtscll(tsc_values[smp_processor_id()]);
+        if (i == NR_LOOPS-1)
+            write_tsc(0, 0);
+
+        atomic_inc(&tsc_count_stop);
+        while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+    }
+}
+#undef NR_LOOPS
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+    int cpuid, phys_id, i;
+
+    /*
+     * If waken up by an INIT in an 82489DX configuration
+     * we may get here before an INIT-deassert IPI reaches
+     * our local APIC.  We have to wait for the IPI or we'll
+     * lock up on an APIC access.
+     */
+    while (!atomic_read(&init_deasserted));
+
+    /*
+     * (This works even if the APIC is not enabled.)
+     */
+    phys_id = GET_APIC_ID(apic_read(APIC_ID));
+    cpuid = smp_processor_id();
+    if (test_and_set_bit(cpuid, &cpu_online_map)) {
+        printk("huh, phys CPU#%d, CPU#%d already present??\n",
+               phys_id, cpuid);
+        BUG();
+    }
+    Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+    /*
+     * STARTUP IPIs are fragile beasts as they might sometimes
+     * trigger some glue motherboard logic. Complete APIC bus
+     * silence for 1 second, this overestimates the time the
+     * boot CPU is spending to send the up to 2 STARTUP IPIs
+     * by a factor of two. This should be enough.
+     */
+
+    for ( i = 0; i < 200; i++ )
+    {
+        if ( test_bit(cpuid, &cpu_callout_map) ) break;
+        mdelay(10);
+    }
+
+    if (!test_bit(cpuid, &cpu_callout_map)) {
+        printk("BUG: CPU%d started up but did not get a callout!\n",
+               cpuid);
+        BUG();
+    }
+
+    /*
+     * the boot CPU has finished the init stage and is spinning
+     * on callin_map until we finish. We are free to set up this
+     * CPU, first the APIC. (this is probably redundant on most
+     * boards)
+     */
+
+    Dprintk("CALLIN, before setup_local_APIC().\n");
+
+    setup_local_APIC();
+
+    __sti();
+
+#ifdef CONFIG_MTRR
+    /*
+     * Must be done before calibration delay is computed
+     */
+    mtrr_init_secondary_cpu ();
+#endif
+
+    Dprintk("Stack at about %p\n",&cpuid);
+
+    /*
+     * Save our processor parameters
+     */
+    smp_store_cpu_info(cpuid);
+
+    /*
+     * Allow the master to continue.
+     */
+    set_bit(cpuid, &cpu_callin_map);
+
+    /*
+     *      Synchronize the TSC with the BP
+     */
+    synchronize_tsc_ap();
+}
+
+int cpucount;
+
+/*
+ * Activate a secondary processor.
+ */
+int __init start_secondary(void *unused)
+{
+    unsigned int cpu = smp_processor_id();
+    /* 6 bytes suitable for passing to LIDT instruction. */
+    unsigned char idt_load[6];
+
+    extern void cpu_init(void);
+
+    /*
+     * Dont put anything before smp_callin(), SMP
+     * booting is too fragile that we want to limit the
+     * things done here to the most necessary things.
+     */
+    cpu_init();
+    smp_callin();
+
+    while (!atomic_read(&smp_commenced))
+        rep_nop();
+
+    /*
+     * At this point, boot CPU has fully initialised the IDT. It is
+     * now safe to make ourselves a private copy.
+     */
+    idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL);
+    memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
+    *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
+    *(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+    __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+
+    /*
+     * low-memory mappings have been cleared, flush them from the local TLBs 
+     * too.
+     */
+    local_flush_tlb();
+
+    cpu_idle();
+    BUG();
+
+    return 0;
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+    /*
+     * We don't actually need to load the full TSS,
+     * basically just the stack pointer and the eip.
+     */
+    asm volatile(
+        "movl %0,%%esp\n\t"
+        "jmp *%1"
+        :
+        :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+    void * esp;
+    unsigned short ss;
+} stack_start;
+
+/* which physical APIC ID maps to which logical CPU number */
+volatile int physical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which physical APIC ID */
+volatile int cpu_2_physical_apicid[NR_CPUS];
+
+/* which logical APIC ID maps to which logical CPU number */
+volatile int logical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which logical APIC ID */
+volatile int cpu_2_logical_apicid[NR_CPUS];
+
+static inline void init_cpu_to_apicid(void)
+/* Initialize all maps between cpu number and apicids */
+{
+    int apicid, cpu;
+
+    for (apicid = 0; apicid < MAX_APICID; apicid++) {
+        physical_apicid_2_cpu[apicid] = -1;
+        logical_apicid_2_cpu[apicid] = -1;
+    }
+    for (cpu = 0; cpu < NR_CPUS; cpu++) {
+        cpu_2_physical_apicid[cpu] = -1;
+        cpu_2_logical_apicid[cpu] = -1;
+    }
+}
+
+static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
+/* 
+ * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+    physical_apicid_2_cpu[apicid] = cpu;	
+    cpu_2_physical_apicid[cpu] = apicid;
+}
+
+static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
+/* 
+ * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+    physical_apicid_2_cpu[apicid] = -1;	
+    cpu_2_physical_apicid[cpu] = -1;
+}
+
+#if APIC_DEBUG
+static inline void inquire_remote_apic(int apicid)
+{
+    int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+    char *names[] = { "ID", "VERSION", "SPIV" };
+    int timeout, status;
+
+    printk("Inquiring remote APIC #%d...\n", apicid);
+
+    for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+        printk("... APIC #%d %s: ", apicid, names[i]);
+
+        /*
+         * Wait for idle.
+         */
+        apic_wait_icr_idle();
+
+        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+        apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+        timeout = 0;
+        do {
+            udelay(100);
+            status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+        } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+        switch (status) {
+        case APIC_ICR_RR_VALID:
+            status = apic_read(APIC_RRR);
+            printk("%08x\n", status);
+            break;
+        default:
+            printk("failed\n");
+        }
+    }
+}
+#endif
+
+
+static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+{
+    unsigned long send_status = 0, accept_status = 0;
+    int maxlvt, timeout, num_starts, j;
+
+    Dprintk("Asserting INIT.\n");
+
+    /*
+     * Turn INIT on target chip
+     */
+    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+    /*
+     * Send IPI
+     */
+    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+                      | APIC_DM_INIT);
+
+    Dprintk("Waiting for send to finish...\n");
+    timeout = 0;
+    do {
+        Dprintk("+");
+        udelay(100);
+        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+    } while (send_status && (timeout++ < 1000));
+
+    mdelay(10);
+
+    Dprintk("Deasserting INIT.\n");
+
+    /* Target chip */
+    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+    /* Send IPI */
+    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+    Dprintk("Waiting for send to finish...\n");
+    timeout = 0;
+    do {
+        Dprintk("+");
+        udelay(100);
+        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+    } while (send_status && (timeout++ < 1000));
+
+    atomic_set(&init_deasserted, 1);
+
+    /*
+     * Should we send STARTUP IPIs ?
+     *
+     * Determine this based on the APIC version.
+     * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+     */
+    if (APIC_INTEGRATED(apic_version[phys_apicid]))
+        num_starts = 2;
+    else
+        num_starts = 0;
+
+    /*
+     * Run STARTUP IPI loop.
+     */
+    Dprintk("#startup loops: %d.\n", num_starts);
+
+    maxlvt = get_maxlvt();
+
+    for (j = 1; j <= num_starts; j++) {
+        Dprintk("Sending STARTUP #%d.\n",j);
+
+        apic_read_around(APIC_SPIV);
+        apic_write(APIC_ESR, 0);
+        apic_read(APIC_ESR);
+        Dprintk("After apic_write.\n");
+
+        /*
+         * STARTUP IPI
+         */
+
+        /* Target chip */
+        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+        /* Boot on the stack */
+        /* Kick the second */
+        apic_write_around(APIC_ICR, APIC_DM_STARTUP
+                          | (start_eip >> 12));
+
+        /*
+         * Give the other CPU some time to accept the IPI.
+         */
+        udelay(300);
+
+        Dprintk("Startup point 1.\n");
+
+        Dprintk("Waiting for send to finish...\n");
+        timeout = 0;
+        do {
+            Dprintk("+");
+            udelay(100);
+            send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+        } while (send_status && (timeout++ < 1000));
+
+        /*
+         * Give the other CPU some time to accept the IPI.
+         */
+        udelay(200);
+        /*
+         * Due to the Pentium erratum 3AP.
+         */
+        if (maxlvt > 3) {
+            apic_read_around(APIC_SPIV);
+            apic_write(APIC_ESR, 0);
+        }
+        accept_status = (apic_read(APIC_ESR) & 0xEF);
+        if (send_status || accept_status)
+            break;
+    }
+    Dprintk("After Startup.\n");
+
+    if (send_status)
+        printk("APIC never delivered???\n");
+    if (accept_status)
+        printk("APIC delivery error (%lx).\n", accept_status);
+
+    return (send_status | accept_status);
+}
+
+extern unsigned long cpu_initialized;
+
+static void __init do_boot_cpu (int apicid) 
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ */
+{
+    struct task_struct *idle;
+    unsigned long boot_error = 0;
+    int timeout, cpu;
+    unsigned long start_eip;
+    l2_pgentry_t *pagetable;
+
+    cpu = ++cpucount;
+    /*
+     * We can't use kernel_thread since we must avoid to reschedule the child.
+     */
+    if ( (idle = do_newdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
+        panic("failed 'newdomain' for CPU %d", cpu);
+ 
+    pagetable = (void *)get_free_page(GFP_KERNEL);
+    memcpy(pagetable, idle0_pg_table, PAGE_SIZE);
+    idle_pg_table[cpu] = pagetable;
+    idle->mm.pagetable = mk_pagetable(__pa(pagetable));
+
+    map_cpu_to_boot_apicid(cpu, apicid);
+
+    idle->thread.esp = idle->thread.esp0 = (unsigned long)idle + THREAD_SIZE;
+    idle->thread.eip = (unsigned long) start_secondary;
+
+    SET_DEFAULT_FAST_TRAP(&idle->thread);
+
+    /* start_eip had better be page-aligned! */
+    start_eip = setup_trampoline();
+
+    /* So we see what's up   */
+    printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+    stack_start.esp = (void *) (1024+PAGE_SIZE+(char *)idle-__PAGE_OFFSET);
+
+    /*
+     * This grunge runs the startup process for
+     * the targeted processor.
+     */
+
+    atomic_set(&init_deasserted, 0);
+
+    Dprintk("Setting warm reset code and vector.\n");
+
+    CMOS_WRITE(0xa, 0xf);
+    local_flush_tlb();
+    Dprintk("1.\n");
+    *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+    Dprintk("2.\n");
+    *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+    Dprintk("3.\n");
+
+    /*
+     * Be paranoid about clearing APIC errors.
+     */
+    if (APIC_INTEGRATED(apic_version[apicid])) {
+        apic_read_around(APIC_SPIV);
+        apic_write(APIC_ESR, 0);
+        apic_read(APIC_ESR);
+    }
+
+    /*
+     * Status is now clean
+     */
+    boot_error = 0;
+
+    /*
+     * Starting actual IPI sequence...
+     */
+
+    boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
+
+    if (!boot_error) {
+        /*
+         * allow APs to start initializing.
+         */
+        Dprintk("Before Callout %d.\n", cpu);
+        set_bit(cpu, &cpu_callout_map);
+        Dprintk("After Callout %d.\n", cpu);
+
+        /*
+         * Wait 5s total for a response
+         */
+        for (timeout = 0; timeout < 50000; timeout++) {
+            if (test_bit(cpu, &cpu_callin_map))
+                break;	/* It has booted */
+            udelay(100);
+        }
+
+        if (test_bit(cpu, &cpu_callin_map)) {
+            /* number CPUs logically, starting from 1 (BSP is 0) */
+            printk("CPU%d has booted.\n", cpu);
+        } else {
+            boot_error= 1;
+            if (*((volatile unsigned char *)phys_to_virt(8192))
+                == 0xA5)
+				/* trampoline started but...? */
+                printk("Stuck ??\n");
+            else
+				/* trampoline code not run */
+                printk("Not responding.\n");
+#if APIC_DEBUG
+            inquire_remote_apic(apicid);
+#endif
+        }
+    }
+    if (boot_error) {
+        /* Try to put things back the way they were before ... */
+        unmap_cpu_to_boot_apicid(cpu, apicid);
+        clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
+        clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+        clear_bit(cpu, &cpu_online_map);  /* was set in smp_callin() */
+        cpucount--;
+    }
+
+    /* mark "stuck" area as not stuck */
+    *((volatile unsigned long *)phys_to_virt(8192)) = 0;
+}
+
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+static int boot_cpu_logical_apicid;
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio = NULL;
+
+void __init smp_boot_cpus(void)
+{
+    int apicid, bit;
+
+#ifdef CONFIG_MTRR
+    /*  Must be done before other processors booted  */
+    mtrr_init_boot_cpu ();
+#endif
+    /* Initialize the logical to physical CPU number mapping */
+    init_cpu_to_apicid();
+
+    /*
+     * Setup boot CPU information
+     */
+    smp_store_cpu_info(0); /* Final full version of the data */
+    printk("CPU%d booted\n", 0);
+
+    /*
+     * We have the boot CPU online for sure.
+     */
+    set_bit(0, &cpu_online_map);
+    boot_cpu_logical_apicid = logical_smp_processor_id();
+    map_cpu_to_boot_apicid(0, boot_cpu_apicid);
+
+    /*
+     * If we couldnt find an SMP configuration at boot time,
+     * get out of here now!
+     */
+    if (!smp_found_config) {
+        printk("SMP motherboard not detected.\n");
+        io_apic_irqs = 0;
+        cpu_online_map = phys_cpu_present_map = 1;
+        smp_num_cpus = 1;
+        if (APIC_init_uniprocessor())
+            printk("Local APIC not detected."
+                   " Using dummy APIC emulation.\n");
+        goto smp_done;
+    }
+
+    /*
+     * Should not be necessary because the MP table should list the boot
+     * CPU too, but we do it for the sake of robustness anyway.
+     */
+    if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
+        printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+               boot_cpu_physical_apicid);
+        phys_cpu_present_map |= (1 << hard_smp_processor_id());
+    }
+
+    /*
+     * If we couldn't find a local APIC, then get out of here now!
+     */
+    if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+        !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
+        printk("BIOS bug, local APIC #%d not detected!...\n",
+               boot_cpu_physical_apicid);
+        printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+        io_apic_irqs = 0;
+        cpu_online_map = phys_cpu_present_map = 1;
+        smp_num_cpus = 1;
+        goto smp_done;
+    }
+
+    verify_local_APIC();
+
+    /*
+     * If SMP should be disabled, then really disable it!
+     */
+    if (!max_cpus) {
+        smp_found_config = 0;
+        printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+        io_apic_irqs = 0;
+        cpu_online_map = phys_cpu_present_map = 1;
+        smp_num_cpus = 1;
+        goto smp_done;
+    }
+
+    connect_bsp_APIC();
+    setup_local_APIC();
+
+    if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
+        BUG();
+
+    /*
+     * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+     *
+     * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+     * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
+     * clustered apic ID.
+     */
+    Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
+
+    for (bit = 0; bit < NR_CPUS; bit++) {
+        apicid = cpu_present_to_apicid(bit);
+        /*
+         * Don't even attempt to start the boot CPU!
+         */
+        if (apicid == boot_cpu_apicid)
+            continue;
+
+        if (!(phys_cpu_present_map & (1 << bit)))
+            continue;
+        if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
+            continue;
+
+        do_boot_cpu(apicid);
+
+        /*
+         * Make sure we unmap all failed CPUs
+         */
+        if ((boot_apicid_to_cpu(apicid) == -1) &&
+            (phys_cpu_present_map & (1 << bit)))
+            printk("CPU #%d not responding - cannot use it.\n",
+                   apicid);
+    }
+
+    /*
+     * Cleanup possible dangling ends...
+     */
+    /*
+     * Install writable page 0 entry to set BIOS data area.
+     */
+    local_flush_tlb();
+
+    /*
+     * Paranoid:  Set warm reset code and vector here back
+     * to default values.
+     */
+    CMOS_WRITE(0, 0xf);
+
+    *((volatile long *) phys_to_virt(0x467)) = 0;
+
+    if (!cpucount) {
+        printk("Error: only one processor found.\n");
+    } else {
+        printk("Total of %d processors activated.\n", cpucount+1);
+    }
+    smp_num_cpus = cpucount + 1;
+
+    if (smp_b_stepping)
+        printk("WARNING: SMP operation may"
+               " be unreliable with B stepping processors.\n");
+    Dprintk("Boot done.\n");
+
+    /*
+     * Here we can be sure that there is an IO-APIC in the system. Let's
+     * go and set it up:
+     */
+    if ( nr_ioapics ) setup_IO_APIC();
+
+    /* Set up all local APIC timers in the system. */
+    setup_APIC_clocks();
+
+    /* Synchronize the TSC with the AP(s). */
+    if ( cpucount ) synchronize_tsc_bp();
+
+ smp_done:
+    ;
+}
diff --git a/xen/arch/i386/time.c b/xen/arch/i386/time.c
new file mode 100644
index 0000000000..773c4cfeb3
--- /dev/null
+++ b/xen/arch/i386/time.c
@@ -0,0 +1,434 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: i386/time.c
+ *      Author: 
+ *     Changes: 
+ *              
+ *        Date: Jan 2003
+ * 
+ * Environment: Xen Hypervisor
+ * Description: modified version of Linux' time.c
+ *              implements system and wall clock time.
+ *				based on freebsd's implementation.
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+/*
+ *  linux/arch/i386/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+
+#include <asm/io.h>
+#include <xeno/smp.h>
+#include <xeno/irq.h>
+#include <asm/msr.h>
+#include <asm/mpspec.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/mc146818rtc.h>
+
+#ifdef TIME_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
+unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+
+int timer_ack=0;
+extern spinlock_t i8259A_lock;
+static inline void do_timer_interrupt(int irq, 
+                                      void *dev_id, struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_IO_APIC
+    if (timer_ack) {
+        /*
+         * Subtle, when I/O APICs are used we have to ack timer IRQ manually 
+         * to reset the IRR bit for do_slow_gettimeoffset(). This will also 
+         * deassert NMI lines for the watchdog if run on an 82489DX-based 
+         * system.
+         */
+        spin_lock(&i8259A_lock);
+        outb(0x0c, 0x20);
+        /* Ack the IRQ; AEOI will end it automatically. */
+        inb(0x20);
+        spin_unlock(&i8259A_lock);
+    }
+#endif
+    do_timer(regs);
+}
+
+/*
+ * This is only temporarily. Once the APIC s up and running this 
+ * timer interrupt is turned off.
+ */
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    do_timer_interrupt(irq, NULL, regs);
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0,
+								  "timer", NULL, NULL};
+
+/* ------ Calibrate the TSC ------- 
+ * Return processor ticks per second / CALIBRATE_FRAC.
+ */
+
+#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
+#define CALIBRATE_FRAC  20      /* calibrate over 50ms */
+#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+
+static unsigned long __init calibrate_tsc(void)
+{
+    /* Set the Gate high, disable speaker */
+    outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+    /*
+     * Now let's take care of CTC channel 2
+     *
+     * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
+     * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
+     * to begin countdown.
+     */
+    outb(0xb0, 0x43);			/* binary, mode 0, LSB/MSB, Ch 2 */
+    outb(CALIBRATE_LATCH & 0xff, 0x42);	/* LSB of count */
+    outb(CALIBRATE_LATCH >> 8, 0x42);	/* MSB of count */
+
+    {
+        unsigned long startlow, starthigh;
+        unsigned long endlow, endhigh;
+        unsigned long count;
+
+        rdtsc(startlow,starthigh);
+        count = 0;
+        do {
+            count++;
+        } while ((inb(0x61) & 0x20) == 0);
+        rdtsc(endlow,endhigh);
+
+        /* Error: ECTCNEVERSET */
+        if (count <= 1)
+            goto bad_ctc;
+
+        /* 64-bit subtract - gcc just messes up with long longs */
+        __asm__("subl %2,%0\n\t"
+                "sbbl %3,%1"
+                :"=a" (endlow), "=d" (endhigh)
+                :"g" (startlow), "g" (starthigh),
+                "0" (endlow), "1" (endhigh));
+
+        /* Error: ECPUTOOFAST */
+        if (endhigh)
+            goto bad_ctc;
+
+        return endlow;
+    }
+
+    /*
+     * The CTC wasn't reliable: we got a hit on the very first read, or the CPU
+     * was so fast/slow that the quotient wouldn't fit in 32 bits..
+     */
+ bad_ctc:
+    return 0;
+}
+
+/***************************************************************************
+ * CMOS Timer functions
+ ***************************************************************************/
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon,
+        unsigned int day, unsigned int hour,
+        unsigned int min, unsigned int sec)
+{
+    if (0 >= (int) (mon -= 2)) {   /* 1..12 -> 11,12,1..10 */
+        mon += 12;                 /* Puts Feb last since it has leap day */
+        year -= 1;
+	}
+    return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
+              year*365 - 719499
+        )*24 + hour /* now have hours */
+        )*60 + min /* now have minutes */
+        )*60 + sec; /* finally seconds */
+}
+
+static unsigned long get_cmos_time(void)
+{
+    unsigned int year, mon, day, hour, min, sec;
+    int i;
+
+    spin_lock(&rtc_lock);
+    /* The Linux interpretation of the CMOS clock register contents:
+     * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
+     * RTC registers show the second which has precisely just started.
+     * Let's hope other operating systems interpret the RTC the same way.
+     */
+    /* read RTC exactly on falling edge of update flag */
+    for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
+        if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+            break;
+    for (i = 0 ; i < 1000000 ; i++)	/* must try at least 2.228 ms */
+        if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+            break;
+	do { /* Isn't this overkill ? UIP above should guarantee consistency */
+        sec = CMOS_READ(RTC_SECONDS);
+        min = CMOS_READ(RTC_MINUTES);
+        hour = CMOS_READ(RTC_HOURS);
+        day = CMOS_READ(RTC_DAY_OF_MONTH);
+        mon = CMOS_READ(RTC_MONTH);
+        year = CMOS_READ(RTC_YEAR);
+    } while (sec != CMOS_READ(RTC_SECONDS));
+    if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+    {
+        BCD_TO_BIN(sec);
+        BCD_TO_BIN(min);
+	    BCD_TO_BIN(hour);
+	    BCD_TO_BIN(day);
+	    BCD_TO_BIN(mon);
+	    BCD_TO_BIN(year);
+    }
+    spin_unlock(&rtc_lock);
+    if ((year += 1900) < 1970)
+        year += 100;
+    printk(".... CMOS Clock:  %02d/%02d/%04d %02d:%02d:%02d\n",
+           day, mon, year, hour, min, sec);
+    return mktime(year, mon, day, hour, min, sec);
+}
+
+/***************************************************************************
+ * Time
+ * XXX RN: Will be able to remove some of the locking once the time is
+ * update by the APIC on only one CPU. 
+ ***************************************************************************/
+
+static spinlock_t stime_lock;
+static u32	st_scale_f;
+static u32	st_scale_i;
+u32			stime_pcc;	 /* cycle counter value at last timer irq */
+s_time_t	stime_now;   /* time in ns at last timer IRQ */
+
+s_time_t get_s_time(void)
+{
+    unsigned long flags;
+    u32 	 delta_tsc, low, pcc;
+    u64      delta;
+    s_time_t now;
+
+    spin_lock_irqsave(&stime_lock, flags);
+
+    pcc = stime_pcc;		
+    now = stime_now;
+
+    /* only use bottom 32bits of TSC. This should be sufficient */
+    rdtscl(low);
+    delta_tsc = low - pcc;
+    delta = ((u64)delta_tsc * st_scale_f);
+    delta >>= 32;
+    delta += ((u64)delta_tsc * st_scale_i);
+
+    spin_unlock_irqrestore(&stime_lock, flags);
+
+    return now + delta; 
+}
+
+
+/* Wall Clock time */
+static spinlock_t wctime_lock;
+struct timeval    wall_clock_time; /* wall clock time at last update */
+s_time_t	      wctime_st;       /* system time at last update */
+
+void do_gettimeofday(struct timeval *tv)
+{
+    unsigned long flags;
+    unsigned long usec, sec;
+
+    spin_lock_irqsave(&wctime_lock, flags);
+    usec = ((unsigned long)(NOW() - wctime_st))/1000;
+    sec = wall_clock_time.tv_sec;
+    usec += wall_clock_time.tv_usec;
+    spin_unlock_irqrestore(&wctime_lock, flags);
+
+    while (usec >= 1000000) {
+        usec -= 1000000;
+        sec++;
+    }
+    tv->tv_sec = sec;
+    tv->tv_usec = usec;
+}
+
+void do_settimeofday(struct timeval *tv)
+{
+    printk("XXX: do_settimeofday not implemented\n");
+}
+
+/***************************************************************************
+ * Update times
+ ***************************************************************************/
+
+/* update a domains notion of time */
+void update_dom_time(shared_info_t *si)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&stime_lock, flags);
+    si->system_time  = stime_now;
+    si->st_timestamp = stime_pcc;
+    spin_unlock_irqrestore(&stime_lock, flags);
+
+    spin_lock_irqsave(&wctime_lock, flags);
+    si->tv_sec       = wall_clock_time.tv_sec;
+    si->tv_usec      = wall_clock_time.tv_usec;
+    si->wc_timestamp = wctime_st;
+    si->wc_version++;
+    spin_unlock_irqrestore(&wctime_lock, flags);	
+
+    TRC(printk(" 0x%08X%08X\n", (u32)(wctime_st>>32), (u32)wctime_st));
+}
+
+/*
+ * Update hypervisors notion of time
+ * This is done periodically of it's own timer
+ */
+static struct ac_timer update_timer;
+static void update_time(unsigned long foo)
+{
+    unsigned long  flags;
+    u32		       new_pcc;
+    s_time_t       new_st;
+    unsigned long  usec;
+
+    new_st = NOW();
+    rdtscl(new_pcc);
+
+    /* Update system time. */
+    spin_lock_irqsave(&stime_lock, flags);
+    stime_now = new_st;
+    stime_pcc=new_pcc;
+    /* Don't reeenable IRQs until we release wctime_lock. */
+    spin_unlock(&stime_lock);
+
+    /* Update wall clock time. */
+    spin_lock(&wctime_lock);
+    usec = ((unsigned long)(new_st - wctime_st))/1000;
+    usec += wall_clock_time.tv_usec;
+    while (usec >= 1000000) {
+        usec -= 1000000;
+        wall_clock_time.tv_sec++;
+    }
+    wall_clock_time.tv_usec = usec;
+    wctime_st = new_st;
+    spin_unlock_irqrestore(&wctime_lock, flags);
+
+    TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld,wct=%ld:%ld\n",
+               smp_processor_id(), stime_now, new_st, wall_clock_time.tv_sec,
+               wall_clock_time.tv_usec));
+
+    /* Reload the timer. */
+ again:
+    update_timer.expires  = new_st + MILLISECS(200);
+    if(add_ac_timer(&update_timer) == 1)
+        goto again;
+}
+
+/***************************************************************************
+ * Init Xeno Time
+ * This has to be done after all CPUs have been booted
+ ***************************************************************************/
+int __init init_xeno_time()
+{
+    int      cpu = smp_processor_id();
+    u32	     cpu_cycle;  /* time of one cpu cyle in pico-seconds */
+    u64      scale;      /* scale factor  */
+
+    spin_lock_init(&stime_lock);
+    spin_lock_init(&wctime_lock);
+
+    printk("Init Time[%02d]:\n", cpu);
+
+    /* System Time */
+    cpu_cycle   = (u32) (1000000000LL/cpu_khz); /* in pico seconds */
+    scale = 1000000000LL << 32;
+    scale /= cpu_freq;
+    st_scale_f = scale & 0xffffffff;
+    st_scale_i = scale >> 32;
+
+    /* Wall Clock time */
+    wall_clock_time.tv_sec  = get_cmos_time();
+    wall_clock_time.tv_usec = 0;
+
+    /* set starting times */
+    stime_now = (s_time_t)0;
+    rdtscl(stime_pcc);
+    wctime_st = NOW();
+
+    /* start timer to update time periodically */
+    init_ac_timer(&update_timer);
+    update_timer.function = &update_time;
+    update_time(0);
+
+    printk(".... System Time: %lldns\n", NOW());
+    printk(".....cpu_cycle:   %u ps\n",  cpu_cycle);
+    printk(".... st_scale_f:  %X\n",     st_scale_f);
+    printk(".... st_scale_i:  %X\n",     st_scale_i);
+    printk(".... stime_pcc:   %u\n",     stime_pcc);
+
+    printk(".... Wall Clock:  %lds %ldus\n", wall_clock_time.tv_sec,
+           wall_clock_time.tv_usec);
+    printk(".... wctime_st:   %lld\n", wctime_st);
+
+    return 0;
+}
+
+
+/***************************************************************************
+ * Init
+ ***************************************************************************/
+
+void __init time_init(void)
+{
+    unsigned long ticks_per_frac = calibrate_tsc();
+
+    if ( !ticks_per_frac )
+        panic("Error calibrating TSC\n");
+
+    ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
+    cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
+
+    printk("Detected %lu.%03lu MHz processor.\n", 
+           cpu_khz / 1000, cpu_khz % 1000);
+
+    setup_irq(0, &irq0);
+}
diff --git a/xen/arch/i386/trampoline.S b/xen/arch/i386/trampoline.S
new file mode 100644
index 0000000000..f0beef725a
--- /dev/null
+++ b/xen/arch/i386/trampoline.S
@@ -0,0 +1,54 @@
+/*
+ *
+ *	Trampoline.S	Derived from Setup.S by Linus Torvalds
+ *
+ *	4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ *	Entry: CS:IP point to the start of our code, we are 
+ *	in real mode with no stack, but the rest of the 
+ *	trampoline page to make our stack and everything else
+ *	is a mystery.
+ *
+ *	On entry to trampoline_data, the processor is in real mode
+ *	with 16-bit addressing and 16-bit data.  CS has some value
+ *	and IP is zero.  Thus, data addresses need to be absolute
+ *	(no relocation) and are taken with regard to r_base.
+ */
+
+#include <xeno/config.h>
+#include <asm/page.h>
+
+.data
+
+.code16
+
+ENTRY(trampoline_data)
+r_base = .
+        mov	%cs, %ax	# Code and data in the same place
+	mov	%ax, %ds
+
+	movl	$0xA5A5A5A5, %ebx # Flag an SMP trampoline
+	cli			# We should be safe anyway
+
+	movl	$0xA5A5A5A5, trampoline_data - r_base
+
+	lidt	idt_48 - r_base	# load idt with 0, 0
+	lgdt	gdt_48 - r_base	# load gdt with whatever is appropriate
+
+	xor	%ax, %ax
+	inc	%ax		# protected mode (PE) bit
+	lmsw	%ax		# into protected mode
+	jmp	flush_instr
+flush_instr:
+	ljmpl	$__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET
+
+idt_48:
+	.word	0			# idt limit = 0
+	.word	0, 0			# idt base = 0L
+
+gdt_48:
+	.word	0x0800			# gdt limit = 2048, 256 GDT entries
+	.long	gdt_table-__PAGE_OFFSET	# gdt base = gdt (first SMP CPU)
+
+.globl SYMBOL_NAME(trampoline_end)
+SYMBOL_NAME_LABEL(trampoline_end)
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
new file mode 100644
index 0000000000..5fe0858ba3
--- /dev/null
+++ b/xen/arch/i386/traps.c
@@ -0,0 +1,696 @@
+/*
+ *  linux/arch/i386/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <asm/ptrace.h>
+#include <xeno/delay.h>
+#include <xeno/spinlock.h>
+#include <xeno/irq.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <asm/debugreg.h>
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+#define GTBF_TRAP        1
+#define GTBF_TRAP_NOCODE 2
+#define GTBF_TRAP_CR2    4
+struct guest_trap_bounce {
+    unsigned long  error_code;        /*   0 */
+    unsigned long  cr2;               /*   4 */
+    unsigned short flags;             /*   8 */
+    unsigned short cs;                /*  10 */
+    unsigned long  eip;               /*  12 */
+} guest_trap_bounce[NR_CPUS] = { { 0 } };
+
+asmlinkage int hypervisor_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
+/* Master table, and the one used by CPU0. */
+struct desc_struct idt_table[256] = { {0, 0}, };
+/* All other CPUs have their own copy. */
+struct desc_struct *idt_tables[NR_CPUS] = { 0 };
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 8*20;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+    if (addr >= (unsigned long) &_stext &&
+        addr <= (unsigned long) &_etext)
+        return 1;
+    return 0;
+
+}
+
+void show_trace(unsigned long * stack)
+{
+    int i;
+    unsigned long addr;
+
+    if (!stack)
+        stack = (unsigned long*)&stack;
+
+    printk("Call Trace: ");
+    i = 1;
+    while (((long) stack & (THREAD_SIZE-1)) != 0) {
+        addr = *stack++;
+        if (kernel_text_address(addr)) {
+            if (i && ((i % 6) == 0))
+                printk("\n   ");
+            printk("[<%08lx>] ", addr);
+            i++;
+        }
+    }
+    printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+    unsigned long esp = tsk->thread.esp;
+
+    /* User space on another CPU? */
+    if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+        return;
+    show_trace((unsigned long *)esp);
+}
+
+void show_stack(unsigned long * esp)
+{
+    unsigned long *stack;
+    int i;
+
+    // debugging aid: "show_stack(NULL);" prints the
+    // back trace for this cpu.
+
+    if(esp==NULL)
+        esp=(unsigned long*)&esp;
+
+    printk("Stack trace from ESP=%p:\n", esp);
+
+    stack = esp;
+    for(i=0; i < kstack_depth_to_print; i++) {
+        if (((long) stack & (THREAD_SIZE-1)) == 0)
+            break;
+        if (i && ((i % 8) == 0))
+            printk("\n       ");
+        if ( kernel_text_address(*stack) )
+            printk("[%08lx] ", *stack++);
+        else
+            printk("%08lx ", *stack++);            
+    }
+    printk("\n");
+    //show_trace(esp);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+    unsigned long esp;
+    unsigned short ss;
+
+    esp = (unsigned long) (&regs->esp);
+    ss  = __HYPERVISOR_DS;
+    if ( regs->xcs & 3 )
+    {
+        esp = regs->esp;
+        ss  = regs->xss & 0xffff;
+    }
+
+    printk("CPU:    %d\nEIP:    %04x:[<%08lx>]      \nEFLAGS: %08lx\n",
+           smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
+    printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+           regs->eax, regs->ebx, regs->ecx, regs->edx);
+    printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+           regs->esi, regs->edi, regs->ebp, esp);
+    printk("ds: %04x   es: %04x   ss: %04x\n",
+           regs->xds & 0xffff, regs->xes & 0xffff, ss);
+
+    show_stack(&regs->esp);
+}	
+
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+    spin_lock_irq(&die_lock);
+    printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff);
+    show_registers(regs);
+    spin_unlock_irq(&die_lock);
+    panic("HYPERVISOR DEATH!!\n");
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+    if (!(3 & regs->xcs)) die(str, regs, err);
+}
+
+static void inline do_trap(int trapnr, char *str,
+			   struct pt_regs * regs, 
+                           long error_code, int use_error_code)
+{
+    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+    trap_info_t *ti;
+    unsigned long addr, fixup;
+
+    if (!(regs->xcs & 3))
+        goto fault_in_hypervisor;
+
+    ti = current->thread.traps + trapnr;
+    if ( trapnr == 14 )
+    {
+        /* page fault pushes %cr2 */
+        gtb->flags = GTBF_TRAP_CR2;
+        __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (gtb->cr2) : );
+    }
+    else
+    {
+        gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
+    }
+    gtb->error_code = error_code;
+    gtb->cs         = ti->cs;
+    gtb->eip        = ti->address;
+    return; 
+
+ fault_in_hypervisor:
+
+    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+    {
+        regs->eip = fixup;
+        return;
+    }
+
+    __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
+
+    if ( (trapnr == 14) && (addr >= PAGE_OFFSET) )
+    {
+        unsigned long page;
+        unsigned long *pde;
+        pde = (unsigned long *)idle_pg_table[smp_processor_id()];
+        page = pde[addr >> L2_PAGETABLE_SHIFT];
+        printk("*pde = %08lx\n", page);
+        if ( page & _PAGE_PRESENT )
+        {
+            page &= PAGE_MASK;
+            page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
+            printk(" *pte = %08lx\n", page);
+        }
+    }
+
+    show_registers(regs);
+    panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
+          "[error_code=%08x]\n"
+          "Faulting linear address might be %08lx\n",
+          smp_processor_id(), trapnr, str,
+          error_code, addr);
+}
+
+#define DO_ERROR_NOCODE(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 0); \
+}
+
+#define DO_ERROR(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 1); \
+}
+
+DO_ERROR_NOCODE( 0, "divide error", divide_error)
+DO_ERROR_NOCODE( 3, "int3", int3)
+DO_ERROR_NOCODE( 4, "overflow", overflow)
+DO_ERROR_NOCODE( 5, "bounds", bounds)
+DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
+DO_ERROR_NOCODE( 7, "device not available", device_not_available)
+DO_ERROR( 8, "double fault", double_fault)
+DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, "invalid TSS", invalid_TSS)
+DO_ERROR(11, "segment not present", segment_not_present)
+DO_ERROR(12, "stack segment", stack_segment)
+DO_ERROR(14, "page fault", page_fault)
+/* Vector 15 reserved by Intel */
+DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
+DO_ERROR(17, "alignment check", alignment_check)
+DO_ERROR_NOCODE(18, "machine check", machine_check)
+DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+    trap_info_t *ti;
+    unsigned long fixup;
+
+    /* Bad shit if error in ring 0, or result of an interrupt. */
+    if (!(regs->xcs & 3) || (error_code & 1))
+        goto gp_in_kernel;
+
+    /*
+     * Cunning trick to allow arbitrary "INT n" handling.
+     * 
+     * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
+     * instruction from trapping to the appropriate vector, when that might not
+     * be expected by Xen or the guest OS. For example, that entry might be for
+     * a fault handler (unlike traps, faults don't increment EIP), or might
+     * expect an error code on the stack (which a software trap never
+     * provides), or might be a hardware interrupt handler that doesn't like
+     * being called spuriously.
+     * 
+     * Instead, a GPF occurs with the faulting IDT vector in the error code.
+     * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is 
+     * clear to indicate that it's a software fault, not hardware.
+     * 
+     * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
+     * okay because they can only be triggered by an explicit DPL-checked
+     * instruction. The DPL specified by the guest OS for these vectors is NOT
+     * CHECKED!!
+     */
+    if ( (error_code & 3) == 2 )
+    {
+        /* This fault must be due to <INT n> instruction. */
+        ti = current->thread.traps + (error_code>>3);
+        if ( ti->dpl >= (regs->xcs & 3) )
+        {
+            gtb->flags = GTBF_TRAP_NOCODE;
+            gtb->cs    = ti->cs;
+            gtb->eip   = ti->address;
+            regs->eip += 2;
+            return;
+        }
+    }
+
+    /* Pass on GPF as is. */
+    ti = current->thread.traps + 13;
+    gtb->flags      = GTBF_TRAP;
+    gtb->error_code = error_code;
+    gtb->cs         = ti->cs;
+    gtb->eip        = ti->address;
+    return;
+
+ gp_in_kernel:
+    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+    {
+        regs->eip = fixup;
+        return;
+    }
+
+    die("general protection fault", regs, error_code);
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+    printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+    printk("You probably have a hardware problem with your RAM chips\n");
+
+    /* Clear and disable the memory parity error line. */
+    reason = (reason & 0xf) | 4;
+    outb(reason, 0x61);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+    unsigned long i;
+
+    printk("NMI: IOCK error (debug interrupt?)\n");
+    show_registers(regs);
+
+    /* Re-enable the IOCK line, wait for a few seconds */
+    reason = (reason & 0xf) | 8;
+    outb(reason, 0x61);
+    i = 2000;
+    while (--i) udelay(1000);
+    reason &= ~8;
+    outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+    printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+    printk("Dazed and confused, but trying to continue\n");
+    printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+    unsigned char reason = inb(0x61);
+
+    if (!(reason & 0xc0)) {
+        unknown_nmi_error(reason, regs);
+        return;
+    }
+    if (reason & 0x80)
+        mem_parity_error(reason, regs);
+    if (reason & 0x40)
+        io_check_error(reason, regs);
+    /*
+     * Reassert NMI in case it became active meanwhile
+     * as it's edge-triggered.
+     */
+    outb(0x8f, 0x70);
+    inb(0x71);		/* dummy */
+    outb(0x0f, 0x70);
+    inb(0x71);		/* dummy */
+}
+
+asmlinkage void math_state_restore(struct pt_regs *regs, long error_code)
+{
+    /* Prevent recursion. */
+    clts();
+
+    if ( !(current->flags & PF_USEDFPU) )
+    {
+        if ( current->flags & PF_DONEFPUINIT )
+            restore_fpu(current);
+        else
+            init_fpu();
+        current->flags |= PF_USEDFPU;   /* So we fnsave on switch_to() */    
+    }
+
+    if ( current->flags & PF_GUEST_STTS )
+    {
+        struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+        gtb->flags      = GTBF_TRAP_NOCODE;
+        gtb->cs         = current->thread.traps[7].cs;
+        gtb->eip        = current->thread.traps[7].address;
+        current->flags &= ~PF_GUEST_STTS;
+    }
+}
+
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+    unsigned int condition;
+    struct task_struct *tsk = current;
+    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+
+    __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+
+    /* Mask out spurious debug traps due to lazy DR7 setting */
+    if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
+         (tsk->thread.debugreg[7] == 0) )
+    {
+        __asm__("movl %0,%%db7" : : "r" (0));
+        return;
+    }
+
+    if ( (regs->xcs & 3) == 0 )
+    {
+        /* Clear TF just for absolute sanity. */
+        regs->eflags &= ~EF_TF;
+        /*
+         * Basically, we ignore watchpoints when they trigger in
+         * the hypervisor. This may happen when a buffer is passed
+         * to us which previously had a watchpoint set on it.
+         * No need to bump EIP; the only faulting trap is an
+         * instruction breakpoint, which can't happen to us.
+         */
+        return;
+    }
+
+    /* Save debug status register where guest OS can peek at it */
+    tsk->thread.debugreg[6] = condition;
+
+    gtb->flags = GTBF_TRAP_NOCODE;
+    gtb->cs    = tsk->thread.traps[1].cs;
+    gtb->eip   = tsk->thread.traps[1].address;
+}
+
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+					  long error_code)
+{ /* nothing */ }
+
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+  int __d0, __d1; \
+  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+	"movw %4,%%dx\n\t" \
+	"movl %%eax,%0\n\t" \
+	"movl %%edx,%1" \
+	:"=m" (*((long *) (gate_addr))), \
+	 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+	:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+	 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
+} while (0)
+
+
+/*
+ * This needs to use 'idt_table' rather than 'idt', and
+ * thus use the _nonmapped_ version of the IDT, as the
+ * Pentium F0 0F bugfix can have resulted in the mapped
+ * IDT being write-protected.
+ */
+void set_intr_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,14,0,addr);
+}
+
+static void __init set_trap_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,15,0,addr);
+}
+
+static void __init set_system_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,15,3,addr);
+}
+
+static void __init set_call_gate(void *a, void *addr)
+{
+    _set_gate(a,12,3,addr);
+}
+
+#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
+	*((gate_addr)+1) = ((base) & 0xff000000) | \
+		(((base) & 0x00ff0000)>>16) | \
+		((limit) & 0xf0000) | \
+		((dpl)<<13) | \
+		(0x00408000) | \
+		((type)<<8); \
+	*(gate_addr) = (((base) & 0x0000ffff)<<16) | \
+		((limit) & 0x0ffff); }
+
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+	"movw %%ax,2(%2)\n\t" \
+	"rorl $16,%%eax\n\t" \
+	"movb %%al,4(%2)\n\t" \
+	"movb %4,5(%2)\n\t" \
+	"movb $0,6(%2)\n\t" \
+	"movb %%ah,7(%2)\n\t" \
+	"rorl $16,%%eax" \
+	: "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
+
+void set_tss_desc(unsigned int n, void *addr)
+{
+    _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89);
+}
+
+void __init trap_init(void)
+{
+    set_trap_gate(0,&divide_error);
+    set_trap_gate(1,&debug);
+    set_intr_gate(2,&nmi);
+    set_system_gate(3,&int3);     /* usable from all privilege levels */
+    set_system_gate(4,&overflow); /* usable from all privilege levels */
+    set_trap_gate(5,&bounds);
+    set_trap_gate(6,&invalid_op);
+    set_trap_gate(7,&device_not_available);
+    set_trap_gate(8,&double_fault);
+    set_trap_gate(9,&coprocessor_segment_overrun);
+    set_trap_gate(10,&invalid_TSS);
+    set_trap_gate(11,&segment_not_present);
+    set_trap_gate(12,&stack_segment);
+    set_trap_gate(13,&general_protection);
+    set_intr_gate(14,&page_fault);
+    set_trap_gate(15,&spurious_interrupt_bug);
+    set_trap_gate(16,&coprocessor_error);
+    set_trap_gate(17,&alignment_check);
+    set_trap_gate(18,&machine_check);
+    set_trap_gate(19,&simd_coprocessor_error);
+
+    /* Only ring 1 can access monitor services. */
+    _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call);
+
+    /* CPU0 uses the master IDT. */
+    idt_tables[0] = idt_table;
+
+    /*
+     * Should be a barrier for any external CPU state.
+     */
+    {
+        extern void cpu_init(void);
+        cpu_init();
+    }
+}
+
+
+long do_set_trap_table(trap_info_t *traps)
+{
+    trap_info_t cur;
+    trap_info_t *dst = current->thread.traps;
+
+    /*
+     * I'm removing the next line, since it seems more intuitive to use this 
+     * as an interface to incrementally update a domain's trap table. Clearing 
+     * out old entries automatically is rather antisocial!
+     */
+    /*memset(dst, 0, sizeof(*dst) * 256);*/
+
+    for ( ; ; )
+    {
+        if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
+        if ( (cur.cs & 3) == 0 ) return -EPERM;
+        if ( cur.address == 0 ) break;
+        memcpy(dst+cur.vector, &cur, sizeof(cur));
+        traps++;
+    }
+
+    return(0);
+}
+
+
+long do_set_fast_trap(int idx)
+{
+    trap_info_t *ti;
+
+    /* Index 0 is special: it disables fast traps. */
+    if ( idx == 0 )
+    {
+        CLEAR_FAST_TRAP(&current->thread);
+        SET_DEFAULT_FAST_TRAP(&current->thread);
+        return 0;
+    }
+
+    /*
+     * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
+     * The former range is used by Windows and MS-DOS.
+     * Vector 0x80 is used by Linux and the BSD variants.
+     */
+    if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) ) return -1;
+
+    ti = current->thread.traps + idx;
+
+    CLEAR_FAST_TRAP(&current->thread);
+
+    current->thread.fast_trap_idx    = idx;
+    current->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
+    current->thread.fast_trap_desc.b = 
+        (ti->address & 0xffff0000) | 0x8f00 | (ti->dpl&3)<<13;
+
+    SET_FAST_TRAP(&current->thread);
+
+    return 0;
+}
+
+
+long do_fpu_taskswitch(void)
+{
+    current->flags |= PF_GUEST_STTS;
+    stts();
+    return 0;
+}
+
+
+long do_set_debugreg(int reg, unsigned long value)
+{
+    int i;
+
+    switch ( reg )
+    {
+    case 0: 
+        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+        __asm__ ( "movl %0, %%db0" : : "r" (value) );
+        break;
+    case 1: 
+        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+        __asm__ ( "movl %0, %%db1" : : "r" (value) );
+        break;
+    case 2: 
+        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+        __asm__ ( "movl %0, %%db2" : : "r" (value) );
+        break;
+    case 3:
+        if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+        __asm__ ( "movl %0, %%db3" : : "r" (value) );
+        break;
+    case 6:
+        /*
+         * DR6: Bits 4-11,16-31 reserved (set to 1).
+         *      Bit 12 reserved (set to 0).
+         */
+        value &= 0xffffefff; /* reserved bits => 0 */
+        value |= 0xffff0ff0; /* reserved bits => 1 */
+        __asm__ ( "movl %0, %%db6" : : "r" (value) );
+        break;
+    case 7:
+        /*
+         * DR7: Bit 10 reserved (set to 1).
+         *      Bits 11-12,14-15 reserved (set to 0).
+         * Privileged bits:
+         *      GD (bit 13): must be 0.
+         *      R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
+         *      LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
+         */
+        /* DR7 == 0 => debugging disabled for this domain. */
+        if ( value != 0 )
+        {
+            value &= 0xffff27ff; /* reserved bits => 0 */
+            value |= 0x00000400; /* reserved bits => 1 */
+            if ( (value & (1<<13)) != 0 ) return -EPERM;
+            for ( i = 0; i < 16; i += 2 )
+                if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
+        }
+        __asm__ ( "movl %0, %%db7" : : "r" (value) );
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    current->thread.debugreg[reg] = value;
+    return 0;
+}
+
+unsigned long do_get_debugreg(int reg)
+{
+    if ( (reg < 0) || (reg > 7) ) return -EINVAL;
+    return current->thread.debugreg[reg];
+}
diff --git a/xen/arch/i386/usercopy.c b/xen/arch/i386/usercopy.c
new file mode 100644
index 0000000000..56322f1b56
--- /dev/null
+++ b/xen/arch/i386/usercopy.c
@@ -0,0 +1,190 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+//#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+	{
+		if(n<512)
+			__copy_user(to,from,n);
+		else
+			mmx_copy_user(to,from,n);
+	}
+	return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+	{
+		if(n<512)
+			__copy_user_zeroing(to,from,n);
+		else
+			mmx_copy_user_zeroing(to, from, n);
+	}
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+#else
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+	prefetch(from);
+	if (access_ok(VERIFY_WRITE, to, n))
+		__copy_user(to,from,n);
+	return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+	prefetchw(to);
+	if (access_ok(VERIFY_READ, from, n))
+		__copy_user_zeroing(to,from,n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+#endif
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)			   \
+do {									   \
+	int __d0, __d1, __d2;						   \
+	__asm__ __volatile__(						   \
+		"	testl %1,%1\n"					   \
+		"	jz 2f\n"					   \
+		"0:	lodsb\n"					   \
+		"	stosb\n"					   \
+		"	testb %%al,%%al\n"				   \
+		"	jz 1f\n"					   \
+		"	decl %1\n"					   \
+		"	jnz 0b\n"					   \
+		"1:	subl %1,%0\n"					   \
+		"2:\n"							   \
+		".section .fixup,\"ax\"\n"				   \
+		"3:	movl %5,%0\n"					   \
+		"	jmp 2b\n"					   \
+		".previous\n"						   \
+		".section __ex_table,\"a\"\n"				   \
+		"	.align 4\n"					   \
+		"	.long 0b,3b\n"					   \
+		".previous"						   \
+		: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),	   \
+		  "=&D" (__d2)						   \
+		: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+		: "memory");						   \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+	long res;
+	__do_strncpy_from_user(dst, src, count, res);
+	return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+	long res = -EFAULT;
+	if (access_ok(VERIFY_READ, src, 1))
+		__do_strncpy_from_user(dst, src, count, res);
+	return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size)					\
+do {									\
+	int __d0;							\
+  	__asm__ __volatile__(						\
+		"0:	rep; stosl\n"					\
+		"	movl %2,%0\n"					\
+		"1:	rep; stosb\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"3:	lea 0(%2,%0,4),%0\n"				\
+		"	jmp 2b\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.align 4\n"					\
+		"	.long 0b,3b\n"					\
+		"	.long 1b,2b\n"					\
+		".previous"						\
+		: "=&c"(size), "=&D" (__d0)				\
+		: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));	\
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		__do_clear_user(to, n);
+	return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+	__do_clear_user(to, n);
+	return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+	unsigned long mask = -__addr_ok(s);
+	unsigned long res, tmp;
+
+	__asm__ __volatile__(
+		"	testl %0, %0\n"
+		"	jz 3f\n"
+		"	andl %0,%%ecx\n"
+		"0:	repne; scasb\n"
+		"	setne %%al\n"
+		"	subl %%ecx,%0\n"
+		"	addl %0,%%eax\n"
+		"1:\n"
+		".section .fixup,\"ax\"\n"
+		"2:	xorl %%eax,%%eax\n"
+		"	jmp 1b\n"
+		"3:	movb $1,%%al\n"
+		"	jmp 1b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 0b,2b\n"
+		".previous"
+		:"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+		:"0" (n), "1" (s), "2" (0), "3" (mask)
+		:"cc");
+	return res & mask;
+}
diff --git a/xen/arch/i386/xeno.lds b/xen/arch/i386/xeno.lds
new file mode 100644
index 0000000000..5947ebada5
--- /dev/null
+++ b/xen/arch/i386/xeno.lds
@@ -0,0 +1,87 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+SECTIONS
+{
+  . = 0xFC400000 + 0x100000;
+  _text = .;			/* Text and read-only data */
+  .text : {
+	*(.text)
+	*(.fixup)
+	*(.gnu.warning)
+	} = 0x9090
+  .text.lock : { *(.text.lock) }	/* out-of-line lock text */
+
+  _etext = .;			/* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);		/* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;	/* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  __start___kallsyms = .;	/* All kernel symbols */
+  __kallsyms : { *(__kallsyms) }
+  __stop___kallsyms = .;
+
+  .data : {			/* Data */
+	*(.data)
+	CONSTRUCTORS
+	}
+
+  _edata = .;			/* End of data section */
+
+  . = ALIGN(8192);		/* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);		/* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;		/* BSS */
+  .bss : {
+	*(.bss)
+	}
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+	*(.text.exit)
+	*(.data.exit)
+	*(.exitcall.exit)
+	}
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/xen/common/Makefile b/xen/common/Makefile
new file mode 100644
index 0000000000..12f1f7d2e9
--- /dev/null
+++ b/xen/common/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o common.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/common/ac_timer.c b/xen/common/ac_timer.c
new file mode 100644
index 0000000000..8f65ff7093
--- /dev/null
+++ b/xen/common/ac_timer.c
@@ -0,0 +1,335 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: ac_timer.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Nov 2002
+ * 
+ * Environment: Xen Hypervisor
+ * Description: Accurate timer for the Hypervisor
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/smp.h>
+#include <xeno/init.h>
+
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+#include <xeno/keyhandler.h>
+
+#include <asm/system.h>
+#include <asm/desc.h>
+
+
+#undef AC_TIMER_TRACE
+#undef AC_TIMER_STATS
+
+#ifdef AC_TIMER_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+/*
+ * We pull handlers off the timer list this far in future,
+ * rather than reprogramming the time hardware.
+ */
+#define TIMER_SLOP (50*1000) /* ns */
+
+/* A timer list per CPU */
+typedef struct ac_timers_st
+{
+    spinlock_t lock;
+    struct list_head timers;
+    struct ac_timer *prev, *curr;
+} __cacheline_aligned ac_timers_t;
+static ac_timers_t ac_timers[NR_CPUS];
+
+#ifdef AC_TIMER_STATS
+#define BUCKETS		1000
+#define MAX_STATS
+typedef struct act_stats_st
+{
+    u32 count;
+    u32 times[2*(BUCKETS)];
+} __cacheline_aligned act_stats_t;
+static act_stats_t act_stats[NR_CPUS];
+
+#endif
+
+/* local prototypes */
+static int  detach_ac_timer(struct ac_timer *timer);
+/*static void ac_timer_debug(unsigned long);*/
+
+/*
+ * add a timer.
+ * return value:
+ *  0: success
+ *  1: failure, timer in the past or timeout value to small
+ * -1: failure, timer uninitialised
+ * fail
+ */
+int add_ac_timer(struct ac_timer *timer)
+{
+    int 			 cpu = smp_processor_id();
+    unsigned long 	 flags;
+    s_time_t		 now;
+
+    /* make sure timeout value is in the future */
+    now = NOW();
+    TRC(printk("ACT  [%02d] add(): now=%lld timo=%lld\n",
+               cpu, now, timer->expires));
+    if (timer->expires <= now) {	
+        printk("ACT[%02d] add_ac_timer: now=0x%08X%08X > expire=0x%08X%08X\n",
+               cpu, (u32)(now>>32), (u32)now,
+               (u32)(timer->expires>>32), (u32)timer->expires);
+        return 1;
+    }
+    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+    /*
+     * Add timer to the list. If it gets added to the front we have to
+     * reprogramm the timer
+     */
+    if (list_empty(&ac_timers[cpu].timers)) {
+        /* Reprogramm and add to head of list */
+        if (!reprogram_ac_timer(timer->expires)) {
+            /* failed */
+            printk("ACT  [%02d] add(): add at head failed\n", cpu);
+            spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+            return 1;
+        }
+        list_add(&timer->timer_list, &ac_timers[cpu].timers);
+        TRC(printk("ACT  [%02d] add(0x%08X%08X): added at head\n", cpu,
+                   (u32)(timer->expires>>32), (u32)timer->expires));
+    } else {
+        struct list_head *pos;
+        struct ac_timer	 *t;
+        for (pos = ac_timers[cpu].timers.next;
+             pos != &ac_timers[cpu].timers;
+             pos = pos->next) {
+            t = list_entry(pos, struct ac_timer, timer_list);
+            if (t->expires > timer->expires)
+                break;
+        }
+
+        if (pos->prev == &ac_timers[cpu].timers) {
+            /* added to head, reprogramm timer */
+            if (!reprogram_ac_timer(timer->expires)) {
+                /* failed */
+                TRC(printk("ACT  [%02d] add(): add at head failed\n", cpu));
+                spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+                return 1;
+            }
+            list_add (&(timer->timer_list), pos->prev);
+            TRC(printk("ACT  [%02d] add(0x%08X%08X): added at head\n", cpu,
+                       (u32)(timer->expires>>32), (u32)timer->expires));
+        } else {
+            list_add (&(timer->timer_list), pos->prev);
+            TRC(printk("ACT  [%02d] add(0x%08X%08X): add < exp=0x%08X%08X\n",
+                       cpu,
+                       (u32)(timer->expires>>32), (u32)timer->expires,
+                       (u32)(t->expires>>32), (u32)t->expires));
+        }
+    }
+    spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+    return 0;
+}
+
+/*
+ * remove a timer
+ * return values:
+ *  0: success
+ * -1: bogus timer
+ */
+static int detach_ac_timer(struct ac_timer *timer)
+{  
+    TRC(int 			 cpu = smp_processor_id());
+    TRC(printk("ACT  [%02d] detach(): \n", cpu));
+    list_del(&timer->timer_list);
+    timer->timer_list.next = NULL;
+    return 0;
+}
+
+/*
+ * remove a timer
+ * return values:
+ *  0: success
+ * -1: bogus timer
+ */
+int rem_ac_timer(struct ac_timer *timer)
+{
+    int 		  cpu = smp_processor_id();
+    int           res;
+    unsigned long flags;
+
+    TRC(printk("ACT  [%02d] remove(): timo=%lld \n", cpu, timer->expires));
+
+    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+    res = detach_ac_timer(timer);	
+    spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+
+    return res;
+}
+
+/*
+ * modify a timer, i.e., set a new timeout value
+ * return value:
+ *  0: sucess
+ * -1: error
+ */
+int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
+{
+    if (rem_ac_timer(timer) != 0)
+        return -1;
+    timer->expires = new_time;
+    if (add_ac_timer(timer) != 0)
+        return -1;
+    return 0;
+}
+
+/*
+ * do_ac_timer
+ * deal with timeouts and run the handlers
+ */
+void do_ac_timer(void)
+{
+    int 			 cpu = smp_processor_id();
+    unsigned long 	 flags;
+    struct ac_timer	 *t;
+
+    spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+
+ do_timer_again:
+
+    TRC(printk("ACT  [%02d] do(): now=%lld\n", cpu, NOW()));
+		
+	/* Sanity: is the timer list empty? */
+    if ( list_empty(&ac_timers[cpu].timers) )
+        printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu);
+
+#ifdef AC_TIMER_STATS
+    {
+        s32	diff;
+        u32 i;
+        diff = ((s32)(NOW() - t->expires)) / 1000; /* delta in us */
+        if (diff < -BUCKETS)
+            diff = -BUCKETS;
+        else if (diff > BUCKETS)
+            diff = BUCKETS;
+        act_stats[cpu].times[diff+BUCKETS]++;
+        act_stats[cpu].count++;
+
+        if (act_stats[cpu].count >= 5000) {
+            printk("ACT Stats\n");
+			for (i=0; i < 2*BUCKETS; i++) {
+				if (act_stats[cpu].times[i] != 0)
+                    printk("ACT [%02d]: %3dus: %5d\n",
+                           cpu,i-BUCKETS, act_stats[cpu].times[i]);
+                act_stats[cpu].times[i]=0;
+            }
+            act_stats[cpu].count = 0;
+            printk("\n");
+        }
+    }
+#endif
+
+    /* Handle all timeouts in the near future. */
+    while ( !list_empty(&ac_timers[cpu].timers) )
+    {
+        t = list_entry(ac_timers[cpu].timers.next, 
+                       struct ac_timer, timer_list);
+        if ( t->expires > (NOW() + TIMER_SLOP) ) break;
+        detach_ac_timer(t);
+        spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+        if ( t->function != NULL ) t->function(t->data);
+        spin_lock_irqsave(&ac_timers[cpu].lock, flags);
+    }
+		
+    /* If list not empty then reprogram timer to new head of list */
+    if ( !list_empty(&ac_timers[cpu].timers) )
+    {
+        t = list_entry(ac_timers[cpu].timers.next, 
+                       struct ac_timer, timer_list);
+        if ( t->expires > 0 )
+        {
+            TRC(printk("ACT  [%02d] do(): reprog timo=%lld\n",cpu,t->expires));
+            if ( !reprogram_ac_timer(t->expires) )
+            {
+                TRC(printk("ACT  [%02d] do(): again\n", cpu));
+                goto do_timer_again;
+            }
+        }
+    }
+
+    spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
+    TRC(printk("ACT  [%02d] do(): end\n", cpu));
+}
+
+/*
+ * debug dump_queue
+ * arguments: queue head, name of queue
+ */
+static void dump_tqueue(struct list_head *queue, char *name)
+{
+    struct list_head *list;
+    int loop = 0;
+    struct ac_timer	 *t;
+
+    printk ("QUEUE %s %lx   n: %lx, p: %lx\n", name,  (unsigned long)queue,
+            (unsigned long) queue->next, (unsigned long) queue->prev);
+    list_for_each (list, queue) {
+        t = list_entry(list, struct ac_timer, timer_list);
+        printk ("  %s %d : %lx ex=0x%08X%08X %lu  n: %lx, p: %lx\n",
+                name, loop++, 
+                (unsigned long)list,
+                (u32)(t->expires>>32), (u32)t->expires, t->data,
+                (unsigned long)list->next, (unsigned long)list->prev);
+    }
+    return; 
+}
+
+
+static void dump_timerq(u_char key, void *dev_id, struct pt_regs *regs)
+{
+    u_long   flags; 
+    s_time_t now = NOW();
+
+    printk("Dumping ac_timer queues for cpu 0: NOW=0x%08X%08X\n",
+           (u32)(now>>32), (u32)now); 
+	
+    spin_lock_irqsave(&ac_timers[0].lock, flags);
+    dump_tqueue(&ac_timers[0].timers, "ac_time"); 
+    spin_unlock_irqrestore(&ac_timers[0].lock, flags);
+    printk("\n");
+    return; 
+}
+
+
+void __init ac_timer_init(void)
+{
+    int i;
+
+    printk ("ACT: Initialising Accurate timers\n");
+
+    for (i = 0; i < NR_CPUS; i++)
+    {
+        INIT_LIST_HEAD(&ac_timers[i].timers);
+        spin_lock_init(&ac_timers[i].lock);
+    }
+
+    add_key_handler('a', dump_timerq, "dump ac_timer queues");
+}
diff --git a/xen/common/block.c b/xen/common/block.c
new file mode 100644
index 0000000000..851b3b544c
--- /dev/null
+++ b/xen/common/block.c
@@ -0,0 +1,22 @@
+/* block.c
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's. 
+ *
+ */
+
+#include <hypervisor-ifs/block.h>
+#include <xeno/lib.h>
+
+/*
+ * create_block_ring
+ *
+ * domain:
+ *
+ * allocates space for a particular domain's block io ring.
+ */
+blk_ring_t *create_block_ring(int domain)
+{
+    printk ("XEN create block ring <not implemented>");
+    return (blk_ring_t *)NULL; 
+}
diff --git a/xen/common/brlock.c b/xen/common/brlock.c
new file mode 100644
index 0000000000..e2bccec6a7
--- /dev/null
+++ b/xen/common/brlock.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * linux/lib/brlock.c
+ *
+ * 'Big Reader' read-write spinlocks.  See linux/brlock.h for details.
+ *
+ * Copyright 2000, Ingo Molnar <mingo@redhat.com>
+ * Copyright 2000, David S. Miller <davem@redhat.com>
+ */
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <linux/sched.h>
+#include <linux/brlock.h>
+
+#ifdef __BRLOCK_USE_ATOMICS
+
+brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
+   { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = RW_LOCK_UNLOCKED } };
+
+void __br_write_lock (enum brlock_indices idx)
+{
+	int i;
+
+	for (i = 0; i < smp_num_cpus; i++)
+		write_lock(&__brlock_array[cpu_logical_map(i)][idx]);
+}
+
+void __br_write_unlock (enum brlock_indices idx)
+{
+	int i;
+
+	for (i = 0; i < smp_num_cpus; i++)
+		write_unlock(&__brlock_array[cpu_logical_map(i)][idx]);
+}
+
+#else /* ! __BRLOCK_USE_ATOMICS */
+
+brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
+   { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = 0 } };
+
+struct br_wrlock __br_write_locks[__BR_IDX_MAX] =
+   { [0 ... __BR_IDX_MAX-1] = { SPIN_LOCK_UNLOCKED } };
+
+void __br_write_lock (enum brlock_indices idx)
+{
+	int i;
+
+again:
+	spin_lock(&__br_write_locks[idx].lock);
+	for (i = 0; i < smp_num_cpus; i++)
+		if (__brlock_array[cpu_logical_map(i)][idx] != 0) {
+			spin_unlock(&__br_write_locks[idx].lock);
+			barrier();
+			cpu_relax();
+			goto again;
+		}
+}
+
+void __br_write_unlock (enum brlock_indices idx)
+{
+	spin_unlock(&__br_write_locks[idx].lock);
+}
+
+#endif /* __BRLOCK_USE_ATOMICS */
+
+#endif /* CONFIG_SMP */
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
new file mode 100644
index 0000000000..1d43f641ba
--- /dev/null
+++ b/xen/common/dom0_ops.c
@@ -0,0 +1,150 @@
+/******************************************************************************
+ * dom0_ops.c
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/dom0_ops.h>
+#include <xeno/sched.h>
+#include <xeno/event.h>
+#include <asm/domain_page.h>
+
+extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int);
+
+static unsigned int get_domnr(void)
+{
+    struct task_struct *p = &idle0_task;
+    unsigned long dom_mask = 0;
+    read_lock_irq(&tasklist_lock);
+    do {
+        if ( is_idle_task(p) ) continue;
+        set_bit(p->domain, &dom_mask); 
+    }
+    while ( (p = p->next_task) != &idle0_task );   
+    read_unlock_irq(&tasklist_lock);
+    return (dom_mask == ~0UL) ? 0 : ffz(dom_mask);
+}
+
+static void build_page_list(struct task_struct *p)
+{
+    unsigned long *list;
+    unsigned long curr;
+    struct list_head *list_ent;
+
+    curr = list_entry(p->pg_head.next, struct pfn_info, list) - frame_table;
+    list = (unsigned long *)map_domain_mem(curr << PAGE_SHIFT);
+
+    list_for_each(list_ent, &p->pg_head)
+    {
+        *list++ = list_entry(list_ent, struct pfn_info, list) - frame_table;
+
+        if( ((unsigned long)list & ~PAGE_MASK) == 0 )
+        {
+            struct list_head *ent = frame_table[curr].list.next;
+            curr = list_entry(ent, struct pfn_info, list) - frame_table;
+            unmap_domain_mem(list-1);
+            list = (unsigned long *)map_domain_mem(curr << PAGE_SHIFT);
+        }
+    }
+
+    unmap_domain_mem(list);
+}
+    
+long do_dom0_op(dom0_op_t *u_dom0_op)
+{
+    long ret = 0;
+    dom0_op_t op;
+
+    if ( current->domain != 0 )
+        return -EPERM;
+
+    if ( copy_from_user(&op, u_dom0_op, sizeof(op)) )
+        return -EFAULT;
+
+    switch ( op.cmd )
+    {
+
+    case DOM0_STARTDOM:
+    {
+        struct task_struct * p = find_domain_by_id(op.u.meminfo.domain);
+        if ( (ret = final_setup_guestos(p, &op.u.meminfo)) != 0 )
+        {
+            p->state = TASK_DYING;
+            release_task(p);
+            break;
+        }
+        wake_up(p);
+        reschedule(p);
+        ret = p->domain;
+    }
+    break;
+
+    case DOM0_NEWDOMAIN:
+    {
+        struct task_struct *p;
+        static unsigned int pro = 0;
+        unsigned int dom = get_domnr();
+        ret = -ENOMEM;
+        if ( dom == 0 ) break;
+        pro = (pro+1) % smp_num_cpus;
+        p = do_newdomain(dom, pro);
+        if ( p == NULL ) break;
+
+        ret = alloc_new_dom_mem(p, op.u.newdomain.memory_kb);
+        if ( ret != 0 ) break;
+
+        build_page_list(p);
+        
+        ret = p->domain;
+        
+        op.u.newdomain.domain = ret;
+        op.u.newdomain.pg_head = 
+            list_entry(p->pg_head.next, struct pfn_info, list) -
+            frame_table;
+        copy_to_user(u_dom0_op, &op, sizeof(op));
+    }
+    break;
+
+    case DOM0_KILLDOMAIN:
+    {
+        unsigned int dom = op.u.killdomain.domain;
+        if ( dom == IDLE_DOMAIN_ID )
+        {
+            ret = -EPERM;
+        }
+        else
+        {
+            ret = kill_other_domain(dom);
+        }
+    }
+    break;
+
+    case DOM0_GETMEMLIST:
+    {
+        int i;
+        unsigned long pfn = op.u.getmemlist.start_pfn;
+        unsigned long *buffer = op.u.getmemlist.buffer;
+        struct list_head *list_ent;
+
+        for ( i = 0; i < op.u.getmemlist.num_pfns; i++ )
+        {
+            /* XXX We trust DOM0 to give us a safe buffer. XXX */
+            *buffer++ = pfn;
+            list_ent = frame_table[pfn].list.next;
+            pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
+        }
+    }
+    break;
+
+    default:
+        ret = -ENOSYS;
+
+    }
+
+    return ret;
+}
diff --git a/xen/common/domain.c b/xen/common/domain.c
new file mode 100644
index 0000000000..5e862ada6d
--- /dev/null
+++ b/xen/common/domain.c
@@ -0,0 +1,590 @@
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <xeno/skbuff.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+#include <xeno/event.h>
+#include <xeno/time.h>
+#include <xeno/dom0_ops.h>
+#include <asm/io.h>
+#include <asm/domain_page.h>
+#include <asm/flushtlb.h>
+#include <asm/msr.h>
+#include <xeno/multiboot.h>
+#include <xeno/blkdev.h>
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+
+extern int nr_mods;
+extern module_t *mod;
+extern unsigned char *cmdline;
+
+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
+
+/*
+ * create a new domain
+ */
+struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu)
+{
+    int retval;
+    struct task_struct *p = NULL;
+    unsigned long flags;
+
+    retval = -ENOMEM;
+    p = alloc_task_struct();
+    if (!p) goto newdomain_out;
+    memset(p, 0, sizeof(*p));
+
+    p->domain    = dom_id;
+    p->processor = cpu;
+
+    spin_lock_init(&p->blk_ring_lock);
+
+    p->shared_info = (void *)get_free_page(GFP_KERNEL);
+    memset(p->shared_info, 0, PAGE_SIZE);
+    SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), dom_id);
+
+    init_blkdev_info(p);
+
+    SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
+    SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
+
+    p->addr_limit = USER_DS;
+    p->state      = TASK_UNINTERRUPTIBLE;
+    p->active_mm  = &p->mm;
+    p->num_net_vifs = 0;
+
+    p->net_ring_base = (net_ring_t *)(p->shared_info + 1);
+    INIT_LIST_HEAD(&p->pg_head);
+    p->tot_pages = 0;
+    write_lock_irqsave(&tasklist_lock, flags);
+    SET_LINKS(p);
+    write_unlock_irqrestore(&tasklist_lock, flags);
+
+ newdomain_out:
+    return(p);
+}
+
+/* Get a pointer to the specified domain.  Consider replacing this
+ * with a hash lookup later.
+ *
+ * Also, kill_other_domain should call this instead of scanning on its own.
+ */
+struct task_struct *find_domain_by_id(unsigned int dom)
+{
+    struct task_struct *p = &idle0_task;
+
+    read_lock_irq(&tasklist_lock);
+    do {
+        if ( (p->domain == dom) ) {
+            read_unlock_irq(&tasklist_lock);
+            return (p);
+        }
+    } while ( (p = p->next_task) != &idle0_task );
+    read_unlock_irq(&tasklist_lock);
+
+    return 0;
+}
+
+
+void kill_domain_with_errmsg(const char *err)
+{
+    printk("DOM%d FATAL ERROR: %s\n", 
+           current->domain, err);
+    kill_domain();
+}
+
+
+/* Kill the currently executing domain. */
+void kill_domain(void)
+{
+    if ( current->domain == 0 )
+    {
+        extern void machine_restart(char *);
+        printk("Domain 0 killed: rebooting machine!\n");
+        machine_restart(0);
+    }
+
+    printk("Killing domain %d\n", current->domain);
+    current->state = TASK_DYING;
+    schedule();
+    BUG(); /* never get here */
+}
+
+
+long kill_other_domain(unsigned int dom)
+{
+    struct task_struct *p = &idle0_task;
+    unsigned long cpu_mask = 0;
+    long ret = -ESRCH;
+
+    read_lock_irq(&tasklist_lock);
+    do {
+        if ( p->domain == dom )
+        {
+            cpu_mask = mark_guest_event(p, _EVENT_DIE);
+            ret = 0;
+            break;
+        }
+    }
+    while ( (p = p->next_task) != &idle0_task );
+    read_unlock_irq(&tasklist_lock);
+
+    hyp_event_notify(cpu_mask);
+
+    return ret;
+}
+
+
+unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
+{
+    struct list_head *temp;
+    struct pfn_info *pf;
+    unsigned int alloc_pfns;
+    unsigned int req_pages;
+    unsigned long flags;
+
+    /* how many pages do we need to alloc? */
+    req_pages = kbytes >> (PAGE_SHIFT - 10);
+
+    spin_lock_irqsave(&free_list_lock, flags);
+    
+    /* is there enough mem to serve the request? */   
+    if ( req_pages > free_pfns ) return -1;
+    
+    /* allocate pages and build a thread through frame_table */
+    temp = free_list.next;
+    for ( alloc_pfns = 0; alloc_pfns < req_pages; alloc_pfns++ )
+    {
+        pf = list_entry(temp, struct pfn_info, list);
+        pf->flags |= p->domain;
+        pf->type_count = pf->tot_count = 0;
+        temp = temp->next;
+        list_del(&pf->list);
+        list_add_tail(&pf->list, &p->pg_head);
+        free_pfns--;
+    }
+   
+    spin_unlock_irqrestore(&free_list_lock, flags);
+    
+    p->tot_pages = req_pages;
+
+    return 0;
+}
+ 
+
+void free_all_dom_mem(struct task_struct *p)
+{
+    struct list_head *list_ent, *tmp;
+
+    list_for_each_safe(list_ent, tmp, &p->pg_head)
+    {
+        struct pfn_info *pf = list_entry(list_ent, struct pfn_info, list);
+        pf->type_count = pf->tot_count = pf->flags = 0;
+        list_del(list_ent);
+        list_add(list_ent, &free_list);
+    }
+
+    p->tot_pages = 0;
+}
+
+
+/* Release resources belonging to task @p. */
+void release_task(struct task_struct *p)
+{
+    ASSERT(p->state == TASK_DYING);
+    ASSERT(!p->has_cpu);
+    write_lock_irq(&tasklist_lock);
+    REMOVE_LINKS(p);
+    write_unlock_irq(&tasklist_lock);
+
+    /*
+     * Safe! Only queue skbuffs with tasklist_lock held.
+     * Only access shared_info with tasklist_lock held.
+     * And free_task_struct() only releases if refcnt == 0.
+     */
+    while ( p->num_net_vifs )
+    {
+        destroy_net_vif(p);
+    }
+    if ( p->mm.perdomain_pt ) free_page((unsigned long)p->mm.perdomain_pt);
+
+    destroy_blkdev_info(p);
+
+    UNSHARE_PFN(virt_to_page(p->shared_info));
+    free_page((unsigned long)p->shared_info);
+
+    free_all_dom_mem(p);
+
+    free_task_struct(p);
+}
+
+
+/* final_setup_guestos is used for final setup and launching of domains other
+ * than domain 0. ie. the domains that are being built by the userspace dom0
+ * domain builder.
+ *
+ * Initial load map:
+ *  start_address:
+ *     OS image
+ *      ....
+ *  stack_start:
+ *  start_info:
+ *      <one page>
+ *  page tables:
+ *      <enough pages>
+ *  end_address:
+ *  shared_info:
+ *      <one page>
+ */
+
+int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
+{
+    l2_pgentry_t * l2tab;
+    l1_pgentry_t * l1tab;
+    start_info_t * virt_startinfo_addr;
+    unsigned long virt_stack_addr;
+    unsigned long phys_l2tab;
+    net_ring_t *net_ring;
+    net_vif_t *net_vif;
+
+    /* entries 0xe0000000 onwards in page table must contain hypervisor
+     * mem mappings - set them up.
+     */
+    phys_l2tab = meminfo->l2_pgt_addr;
+    l2tab = map_domain_mem(phys_l2tab); 
+    memcpy(l2tab + DOMAIN_ENTRIES_PER_L2_PAGETABLE, 
+        ((l2_pgentry_t *)idle_pg_table[p->processor]) + 
+        DOMAIN_ENTRIES_PER_L2_PAGETABLE, 
+        (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE) 
+        * sizeof(l2_pgentry_t));
+    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 
+        mk_l2_pgentry(__pa(p->mm.perdomain_pt) | PAGE_HYPERVISOR);
+    p->mm.pagetable = mk_pagetable(phys_l2tab);
+    unmap_domain_mem(l2tab);
+
+    /* map in the shared info structure */
+    phys_l2tab = pagetable_val(p->mm.pagetable); 
+    l2tab = map_domain_mem(phys_l2tab);
+    l2tab += l2_table_offset(meminfo->virt_shinfo_addr);
+    l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+    l1tab += l1_table_offset(meminfo->virt_shinfo_addr);
+    *l1tab = mk_l1_pgentry(__pa(p->shared_info) | L1_PROT);
+    unmap_domain_mem((void *)((unsigned long)l2tab & PAGE_MASK));
+    unmap_domain_mem((void *)((unsigned long)l1tab & PAGE_MASK));
+
+    /* set up the shared info structure */
+    update_dom_time(p->shared_info);
+    p->shared_info->cpu_freq	 = cpu_freq;
+    p->shared_info->domain_time  = 0;
+
+    /* we pass start info struct to guest os as function parameter on stack */
+    virt_startinfo_addr = (start_info_t *)meminfo->virt_startinfo_addr;
+    virt_stack_addr = (unsigned long)virt_startinfo_addr;       
+
+    /* we need to populate start_info struct within the context of the
+     * new domain. thus, temporarely install its pagetables.
+     */
+    __cli();
+    __asm__ __volatile__ ( 
+        "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
+
+    memset(virt_startinfo_addr, 0, sizeof(*virt_startinfo_addr));
+    virt_startinfo_addr->nr_pages = p->tot_pages;
+    virt_startinfo_addr->shared_info = (shared_info_t *)meminfo->virt_shinfo_addr;
+    virt_startinfo_addr->pt_base = meminfo->virt_load_addr + 
+                    ((p->tot_pages - 1) << PAGE_SHIFT);
+    
+    /* Add virtual network interfaces and point to them in startinfo. */
+    while (meminfo->num_vifs-- > 0) {
+        net_vif = create_net_vif(p->domain);
+        net_ring = net_vif->net_ring;
+        if (!net_ring) panic("no network ring!\n");
+    }
+
+/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs  */
+#define SH2G(_x) (meminfo->virt_shinfo_addr | (((unsigned long)(_x)) & 0xFFF))
+
+    virt_startinfo_addr->net_rings = (net_ring_t *)SH2G(p->net_ring_base); 
+    virt_startinfo_addr->num_net_rings = p->num_net_vifs;
+
+    /* Add block io interface */
+    virt_startinfo_addr->blk_ring = virt_to_phys(p->blk_ring_base);
+
+    /* Copy the command line */
+    strcpy(virt_startinfo_addr->cmd_line, meminfo->cmd_line);
+
+    /* Reinstate the caller's page tables. */
+    __asm__ __volatile__ (
+        "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable)));    
+    __sti();
+    
+    new_thread(p, 
+               (unsigned long)meminfo->virt_load_addr, 
+               (unsigned long)virt_stack_addr, 
+               (unsigned long)virt_startinfo_addr);
+
+    return 0;
+}
+
+static unsigned long alloc_page_from_domain(unsigned long * cur_addr, 
+    unsigned long * index)
+{
+    unsigned long ret = *cur_addr;
+    struct list_head *ent = frame_table[ret >> PAGE_SHIFT].list.prev;
+    *cur_addr = list_entry(ent, struct pfn_info, list) - frame_table;
+    *cur_addr <<= PAGE_SHIFT;
+    (*index)--;    
+    return ret;
+}
+
+/* setup_guestos is used for building dom0 solely. other domains are built in
+ * userspace dom0 and final setup is being done by final_setup_guestos.
+ */
+int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
+{
+
+    struct list_head *list_ent;
+    char *src, *dst;
+    int i, dom = p->domain;
+    unsigned long phys_l1tab, phys_l2tab;
+    unsigned long cur_address, alloc_address;
+    unsigned long virt_load_address, virt_stack_address, virt_shinfo_address;
+    start_info_t  *virt_startinfo_address;
+    unsigned long count;
+    unsigned long alloc_index;
+    l2_pgentry_t *l2tab, *l2start;
+    l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+    struct pfn_info *page = NULL;
+    net_ring_t *net_ring;
+    net_vif_t *net_vif;
+
+    /* Sanity! */
+    if ( p->domain != 0 ) BUG();
+
+    if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) )
+    {
+        printk("DOM%d: Invalid guest OS image\n", dom);
+        return -1;
+    }
+
+    virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8);
+    if ( (virt_load_address & (PAGE_SIZE-1)) )
+    {
+        printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n",
+               dom, virt_load_address);
+        return -1;
+    }
+
+    if ( alloc_new_dom_mem(p, params->memory_kb) ) return -ENOMEM;
+    alloc_address = list_entry(p->pg_head.prev, struct pfn_info, list) -
+        frame_table;
+    alloc_address <<= PAGE_SHIFT;
+    alloc_index = p->tot_pages;
+
+    if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) > 
+         (params->memory_kb << 9) )
+    {
+        printk("DOM%d: Guest OS image is too large\n"
+               "       (%luMB is greater than %uMB limit for a\n"
+               "        %uMB address space)\n",
+               dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20,
+               (params->memory_kb)>>11,
+               (params->memory_kb)>>10);
+        free_all_dom_mem(p);
+        return -1;
+    }
+
+    printk("DOM%d: Guest OS virtual load address is %08lx\n", dom,
+           virt_load_address);
+    
+    /*
+     * WARNING: The new domain must have its 'processor' field
+     * filled in by now !!
+     */
+    phys_l2tab = alloc_page_from_domain(&alloc_address, &alloc_index);
+    l2start = l2tab = map_domain_mem(phys_l2tab);
+    memcpy(l2tab, idle_pg_table[p->processor], PAGE_SIZE);
+    l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
+    memset(l2tab, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
+    p->mm.pagetable = mk_pagetable(phys_l2tab);
+
+    /*
+     * NB. The upper limit on this loop does one extra page. This is to make 
+     * sure a pte exists when we want to map the shared_info struct.
+     */
+
+    l2tab += l2_table_offset(virt_load_address);
+    cur_address = list_entry(p->pg_head.next, struct pfn_info, list) -
+        frame_table;
+    cur_address <<= PAGE_SHIFT;
+    for ( count = 0; count < p->tot_pages + 1; count++ )
+    {
+        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+        {
+            if ( l1tab != NULL ) unmap_domain_mem(l1start);
+            phys_l1tab = alloc_page_from_domain(&alloc_address, &alloc_index);
+            *l2tab++ = mk_l2_pgentry(phys_l1tab|L2_PROT);
+            l1start = l1tab = map_domain_mem(phys_l1tab);
+            clear_page(l1tab);
+            l1tab += l1_table_offset(
+                virt_load_address + (count << PAGE_SHIFT));
+        }
+        *l1tab++ = mk_l1_pgentry(cur_address|L1_PROT);
+        
+        if ( count < p->tot_pages )
+        {
+            page = frame_table + (cur_address >> PAGE_SHIFT);
+            page->flags = dom | PGT_writeable_page;
+            page->type_count = page->tot_count = 1;
+            /* Set up the MPT entry. */
+            machine_to_phys_mapping[cur_address >> PAGE_SHIFT] = count;
+        }
+
+        list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
+        cur_address = list_entry(list_ent, struct pfn_info, list) -
+            frame_table;
+        cur_address <<= PAGE_SHIFT;
+    }
+    unmap_domain_mem(l1start);
+
+    /* pages that are part of page tables must be read only */
+    cur_address = list_entry(p->pg_head.next, struct pfn_info, list) -
+        frame_table;
+    cur_address <<= PAGE_SHIFT;
+    for ( count = 0; count < alloc_index; count++ ) 
+    {
+        list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
+        cur_address = list_entry(list_ent, struct pfn_info, list) -
+            frame_table;
+        cur_address <<= PAGE_SHIFT;
+    }
+
+    l2tab = l2start + l2_table_offset(virt_load_address + 
+        (alloc_index << PAGE_SHIFT));
+    l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+    l1tab += l1_table_offset(virt_load_address + (alloc_index << PAGE_SHIFT));
+    l2tab++;
+    for ( count = alloc_index; count < p->tot_pages; count++ ) 
+    {
+        *l1tab++ = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+        if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
+        {
+            unmap_domain_mem(l1start);
+            l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+            l2tab++;
+        }
+        page = frame_table + (cur_address >> PAGE_SHIFT);
+        page->flags = dom | PGT_l1_page_table;
+        page->tot_count++;
+        
+        list_ent = frame_table[cur_address >> PAGE_SHIFT].list.next;
+        cur_address = list_entry(list_ent, struct pfn_info, list) -
+            frame_table;
+        cur_address <<= PAGE_SHIFT;
+    }
+    page->type_count |= REFCNT_PIN_BIT;
+    page->tot_count  |= REFCNT_PIN_BIT;
+    page->flags = dom | PGT_l2_page_table;
+    unmap_domain_mem(l1start);
+
+    /* Map in the the shared info structure. */
+    virt_shinfo_address = virt_load_address + (p->tot_pages << PAGE_SHIFT); 
+    l2tab = l2start + l2_table_offset(virt_shinfo_address);
+    l1start = l1tab = map_domain_mem(l2_pgentry_to_phys(*l2tab));
+    l1tab += l1_table_offset(virt_shinfo_address);
+    *l1tab = mk_l1_pgentry(__pa(p->shared_info)|L1_PROT);
+    unmap_domain_mem(l1start);
+
+    /* Set up shared info area. */
+	update_dom_time(p->shared_info);
+	p->shared_info->cpu_freq	 = cpu_freq;
+    p->shared_info->domain_time  = 0;
+
+
+    virt_startinfo_address = (start_info_t *)
+        (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT));
+    virt_stack_address  = (unsigned long)virt_startinfo_address;
+    
+    unmap_domain_mem(l2start);
+
+    /* Install the new page tables. */
+    __cli();
+    __write_cr3_counted(pagetable_val(p->mm.pagetable));
+
+    /* Copy the guest OS image. */
+    src = (char *)__va(mod[0].mod_start + 12);
+    dst = (char *)virt_load_address;
+    while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++;
+
+    /* Set up start info area. */
+    memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
+    virt_startinfo_address->nr_pages = p->tot_pages;
+    virt_startinfo_address->shared_info = 
+        (shared_info_t *)virt_shinfo_address;
+    virt_startinfo_address->pt_base = virt_load_address + 
+        ((p->tot_pages - 1) << PAGE_SHIFT); 
+
+    /* Add virtual network interfaces and point to them in startinfo. */
+    while (params->num_vifs-- > 0) {
+        net_vif = create_net_vif(dom);
+        net_ring = net_vif->net_ring;
+        if (!net_ring) panic("no network ring!\n");
+    }
+
+/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs  */
+#define SHIP2GUEST(_x) (virt_shinfo_address | (((unsigned long)(_x)) & 0xFFF))
+
+    virt_startinfo_address->net_rings = 
+	(net_ring_t *)SHIP2GUEST(p->net_ring_base); 
+    virt_startinfo_address->num_net_rings = p->num_net_vifs;
+
+    /* Add block io interface */
+    virt_startinfo_address->blk_ring = virt_to_phys(p->blk_ring_base); 
+
+    /* We tell OS about any modules we were given. */
+    if ( nr_mods > 1 )
+    {
+        virt_startinfo_address->mod_start = 
+            (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address;
+        virt_startinfo_address->mod_len = 
+            mod[nr_mods-1].mod_end - mod[1].mod_start;
+    }
+
+    dst = virt_startinfo_address->cmd_line;
+    if ( mod[0].string )
+    {
+        char *modline = (char *)__va(mod[0].string);
+        for ( i = 0; i < 255; i++ )
+        {
+            if ( modline[i] == '\0' ) break;
+            *dst++ = modline[i];
+        }
+    }
+    *dst = '\0';
+
+    /* Reinstate the caller's page tables. */
+    __write_cr3_counted(pagetable_val(current->mm.pagetable));
+    __sti();
+
+    new_thread(p, 
+               (unsigned long)virt_load_address, 
+               (unsigned long)virt_stack_address, 
+               (unsigned long)virt_startinfo_address);
+
+    return 0;
+}
+
+
+void __init domain_init(void)
+{
+	printk("Initialising domains\n");
+}
+
+
diff --git a/xen/common/domain_page.c b/xen/common/domain_page.c
new file mode 100644
index 0000000000..927ba63ff1
--- /dev/null
+++ b/xen/common/domain_page.c
@@ -0,0 +1,67 @@
+/******************************************************************************
+ * domain_page.h
+ * 
+ * Allow temporary mapping of domain pages. Based on ideas from the
+ * Linux PKMAP code -- the copyrights and credits are retained below.
+ */
+
+/*
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <asm/domain_page.h>
+#include <asm/pgalloc.h>
+
+static unsigned int map_idx[NR_CPUS];
+
+/* Use a spare PTE bit to mark entries ready for recycling. */
+#define READY_FOR_TLB_FLUSH (1<<10)
+
+static void flush_all_ready_maps(void)
+{
+    unsigned long *cache = mapcache[smp_processor_id()];
+
+    /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
+    do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
+    while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
+
+    local_flush_tlb();
+}
+
+
+void *map_domain_mem(unsigned long pa)
+{
+    unsigned long va;
+    int cpu = smp_processor_id();
+    unsigned int idx;
+    unsigned long *cache = mapcache[cpu];
+    unsigned long flags;
+
+    local_irq_save(flags);
+
+    for ( ; ; )
+    {
+        idx = map_idx[cpu] = (map_idx[cpu] + 1) & (MAPCACHE_ENTRIES - 1);
+        if ( idx == 0 ) flush_all_ready_maps();
+        if ( cache[idx] == 0 ) break;
+    }
+
+    cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
+
+    local_irq_restore(flags);
+
+    va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
+    return (void *)va;
+}
+
+void unmap_domain_mem(void *va)
+{
+    unsigned int idx;
+    idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
+    mapcache[smp_processor_id()][idx] |= READY_FOR_TLB_FLUSH;
+}
diff --git a/xen/common/event.c b/xen/common/event.c
new file mode 100644
index 0000000000..2774806443
--- /dev/null
+++ b/xen/common/event.c
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * event.c
+ * 
+ * A nice interface for passing per-domain asynchronous events. 
+ * These events are handled in the hypervisor, prior to return
+ * to the guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/event.h>
+
+typedef void (*hyp_event_callback_fn_t)(void);
+
+extern void schedule(void);
+extern void update_shared_ring(void);
+
+/* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
+static hyp_event_callback_fn_t event_call_fn[] = 
+{
+    schedule,
+    update_shared_ring,
+    kill_domain, 
+};
+
+/* Handle outstanding events for the currently-executing domain. */
+void do_hyp_events(void)
+{
+    int nr;
+    while ( (nr = ffs(current->hyp_events)) != 0 )
+        (event_call_fn[nr-1])();
+}
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
new file mode 100644
index 0000000000..09aae6fc70
--- /dev/null
+++ b/xen/common/kernel.c
@@ -0,0 +1,519 @@
+#include <stdarg.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/multiboot.h>
+#include <xeno/spinlock.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <xeno/delay.h>
+#include <xeno/skbuff.h>
+#include <xeno/interrupt.h>
+#include <asm/io.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <xeno/dom0_ops.h>
+#include <asm/byteorder.h>
+#include <linux/if_ether.h>
+#include <asm/domain_page.h>
+
+/* VGA text definitions. */
+#define COLUMNS	    80
+#define LINES	    24
+#define ATTRIBUTE    7
+#define VIDEO	    __va(0xB8000)
+
+static int xpos, ypos;
+static volatile unsigned char *video;
+
+spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+
+struct e820entry {
+    unsigned long addr_lo, addr_hi;        /* start of memory segment */
+    unsigned long size_lo, size_hi;        /* size of memory segment */
+    unsigned long type;                    /* type of memory segment */
+};
+
+/* Used by domain.c:setup_guestos */
+int nr_mods;
+module_t *mod;
+
+void init_serial(void);
+void start_of_day(void);
+
+/* Command line options and variables. */
+unsigned long opt_dom0_ip = 0;
+unsigned int opt_dom0_mem = 16000; /* default kbytes for DOM0 */
+unsigned int opt_ne_base = 0; /* NE2k NICs cannot be probed */
+unsigned char opt_ifname[10] = "eth0";
+int opt_noht=0, opt_noacpi=0;
+enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL };
+static struct {
+    unsigned char *name;
+    int type;
+    void *var;
+} opts[] = {
+    { "dom0_ip",  OPT_IP,   &opt_dom0_ip },
+    { "dom0_mem", OPT_UINT, &opt_dom0_mem }, 
+    { "ne_base",  OPT_UINT, &opt_ne_base },
+    { "ifname",   OPT_STR,  &opt_ifname },
+    { "noht",     OPT_BOOL, &opt_noht },
+    { "noacpi",   OPT_BOOL, &opt_noacpi },
+    { NULL,       0,        NULL     }
+};
+
+void cmain (unsigned long magic, multiboot_info_t *mbi)
+{
+    struct task_struct *new_dom;
+    dom0_newdomain_t dom0_params;
+    unsigned long max_page;
+    unsigned char *cmdline;
+    int i;
+
+    init_serial();
+    cls();
+
+    if ( magic != MULTIBOOT_BOOTLOADER_MAGIC )
+    {
+        printf("Invalid magic number: 0x%x\n", (unsigned)magic);
+        return;
+    }
+
+    /*
+     * We require some kind of memory and module information.
+     * The rest we can fake!
+     */
+    if ( (mbi->flags & 9) != 9 )
+    {
+        printf("Bad flags passed by bootloader: 0x%x\n", (unsigned)mbi->flags);
+        return;
+    }
+
+    if ( mbi->mods_count == 0 )
+    {
+        printf("Require at least one module!\n");
+        return;
+    }
+
+    /* Are mmap_* valid?  */
+#if 0
+    if ( (mbi->flags & (1<<6)) )
+    {
+        memory_map_t *mmap = (memory_map_t *)mbi->mmap_addr;
+        struct e820entry *e820 = E820_MAP;
+
+        while ( (unsigned long)mmap < (mbi->mmap_addr + mbi->mmap_length) )
+        {
+            e820->addr_lo = mmap->base_addr_low;
+            e820->addr_hi = mmap->base_addr_high;
+            e820->size_lo = mmap->length_low;
+            e820->size_hi = mmap->length_high;
+            e820->type    = mmap->type;
+            e820++;
+            mmap = (memory_map_t *) 
+                ((unsigned long)mmap + mmap->size + sizeof (mmap->size));
+        }
+    }
+#endif
+
+    nr_mods = mbi->mods_count;
+    mod     = (module_t *)__va(mbi->mods_addr);
+
+    /* Parse the command line. */
+    cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
+    if ( cmdline != NULL )
+    {
+        unsigned char *opt_end, *opt;
+        while ( *cmdline == ' ' ) cmdline++;
+        cmdline = strchr(cmdline, ' ');
+        while ( cmdline != NULL )
+        {
+            while ( *cmdline == ' ' ) cmdline++;
+            if ( *cmdline == '\0' ) break;
+            opt_end = strchr(cmdline, ' ');
+            if ( opt_end != NULL ) *opt_end++ = '\0';
+            opt = strchr(cmdline, '=');
+            if ( opt != NULL ) *opt++ = '\0';
+            for ( i = 0; opts[i].name != NULL; i++ )
+            {
+                if ( strcmp(opts[i].name, cmdline ) != 0 ) continue;
+                switch ( opts[i].type )
+                {
+                case OPT_IP:
+                    if ( opt != NULL )
+                        *(unsigned long *)opts[i].var = str_to_quad(opt);
+                    break;
+                case OPT_STR:
+                    if ( opt != NULL )
+                        strcpy(opts[i].var, opt);
+                    break;
+                case OPT_UINT:
+                    if ( opt != NULL )
+                        *(unsigned int *)opts[i].var =
+                            simple_strtol(opt, (char **)&opt, 0);
+                    break;
+                case OPT_BOOL:
+                    *(int *)opts[i].var = 1;
+                    break;
+                }
+            }
+            cmdline = opt_end;
+        }
+    }
+
+    memcpy(&idle0_task_union, &first_task_struct, sizeof(first_task_struct));
+
+    max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
+    init_frametable(max_page);
+    printk("Initialised all memory on a %luMB machine\n",
+           max_page >> (20-PAGE_SHIFT));
+
+    init_page_allocator(mod[nr_mods-1].mod_end, MAX_MONITOR_ADDRESS);
+ 
+    /* These things will get done by do_newdomain() for all other tasks. */
+    current->shared_info = (void *)get_free_page(GFP_KERNEL);
+    memset(current->shared_info, 0, sizeof(shared_info_t));
+    set_fs(USER_DS);
+    current->num_net_vifs = 0;
+
+    start_of_day();
+
+    /* Create initial domain 0. */
+    dom0_params.num_vifs  = 1;
+    dom0_params.memory_kb = opt_dom0_mem;
+
+    if ( opt_dom0_ip == 0 )
+        panic("Must specify an IP address for domain 0!\n");
+
+    add_default_net_rule(0, opt_dom0_ip); // add vfr info for dom0
+
+    new_dom = do_newdomain(0, 0);
+    if ( new_dom == NULL ) panic("Error creating domain 0\n");
+    if ( setup_guestos(new_dom, &dom0_params) != 0 )
+    {
+        panic("Could not set up DOM0 guest OS\n");
+    }
+	update_dom_time(new_dom->shared_info);
+    wake_up(new_dom);
+
+    cpu_idle();
+}    
+
+
+#define SERIAL_BASE 0x3f8
+#define RX_BUF      0
+#define TX_HOLD     0
+#define INT_ENABLE  1
+#define INT_IDENT   2
+#define DATA_FORMAT 3
+#define LINE_CTL    4
+#define LINE_STATUS 5
+#define LINE_IN     6
+#define DIVISOR_LO  0
+#define DIVISOR_HI  1
+
+void init_serial(void)
+{
+    /* 9600 baud, no parity, 1 stop bit, 8 data bits. */
+    outb(0x83, SERIAL_BASE+DATA_FORMAT);
+    outb(12, SERIAL_BASE+DIVISOR_LO);
+    outb(0, SERIAL_BASE+DIVISOR_HI);
+    outb(0x03, SERIAL_BASE+DATA_FORMAT);
+
+    /* No interrupts. */
+    outb(0x00, SERIAL_BASE+INT_ENABLE);
+}
+
+
+void putchar_serial(unsigned char c)
+{
+    if ( c == '\n' ) putchar_serial('\r');
+    if ( (c != '\n') && (c != '\r') && ((c < 32) || (c > 126)) ) return;
+    while ( !(inb(SERIAL_BASE+LINE_STATUS)&(1<<5)) ) barrier();
+    outb(c, SERIAL_BASE+TX_HOLD);
+}
+
+
+/* Clear the screen and initialize VIDEO, XPOS and YPOS.  */
+void cls (void)
+{
+    int i;
+
+    video = (unsigned char *) VIDEO;
+  
+    for (i = 0; i < COLUMNS * LINES * 2; i++)
+        *(video + i) = 0;
+
+    xpos = 0;
+    ypos = 0;
+
+    outw(10+(1<<(5+8)), 0x3d4); /* cursor off */
+}
+
+
+/* Put the character C on the screen.  */
+static void putchar (int c)
+{
+    static char zeroarr[2*COLUMNS] = { 0 };
+
+    putchar_serial(c);
+
+    if (c == '\n' || c == '\r')
+    {
+    newline:
+        xpos = 0;
+        ypos++;
+        if (ypos >= LINES)
+        {
+            ypos = LINES-1;
+            memcpy((char*)video, 
+                   (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS);
+            memcpy((char*)video + (LINES-1)*2*COLUMNS, 
+                   zeroarr, 2*COLUMNS);
+        }
+        return;
+    }
+
+    *(video + (xpos + ypos * COLUMNS) * 2) = c & 0xFF;
+    *(video + (xpos + ypos * COLUMNS) * 2 + 1) = ATTRIBUTE;
+
+    xpos++;
+    if (xpos >= COLUMNS)
+        goto newline;
+}
+
+static inline void __putstr(const char *str)
+{
+    while ( *str ) putchar(*str++);
+}
+
+void printf (const char *fmt, ...)
+{
+    va_list args;
+    char buf[1024], *p;
+    unsigned long flags;
+
+    va_start(args, fmt);
+    (void)vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+  
+    p = buf; 
+    spin_lock_irqsave(&console_lock, flags);
+    while ( *p ) putchar(*p++);
+    spin_unlock_irqrestore(&console_lock, flags);
+}
+
+void panic(const char *fmt, ...)
+{
+    va_list args;
+    char buf[1024], *p;
+    unsigned long flags;
+    extern void machine_restart(char *);
+    
+    va_start(args, fmt);
+    (void)vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+    
+    /* Spit out multiline message in one go. */
+    spin_lock_irqsave(&console_lock, flags);
+    __putstr("\n****************************************\n");
+    p = buf;
+    while ( *p ) putchar(*p++);
+    __putstr("Aieee! CPU");
+    putchar((char)smp_processor_id() + '0');
+    __putstr(" is toast...\n");
+    __putstr("****************************************\n\n");
+    __putstr("Reboot in five seconds...\n");
+    spin_unlock_irqrestore(&console_lock, flags);
+
+    mdelay(5000);
+    machine_restart(0);
+}
+
+/* No-op syscall. */
+asmlinkage long sys_ni_syscall(void)
+{
+    return -ENOSYS;
+}
+
+
+unsigned short compute_cksum(unsigned short *buf, int count)
+{
+	/* Function written by ek247
+	 * Computes IP and UDP checksum.
+	 * To be used for the fake console packets
+	 * created in console_export
+	 */
+
+    unsigned long sum=0;
+
+    while (count--)
+    {
+        sum+=*buf++;
+        if (sum & 0xFFFF0000)
+        {
+            //carry occured, so wrap around
+            sum &=0xFFFF;
+            sum++;
+        }
+    }
+    return ~(sum & 0xFFFF);
+}
+
+
+
+/* XXX SMH: below is rather vile; pulled in to allow network console */
+
+extern int netif_rx(struct sk_buff *); 
+extern struct net_device *the_dev;
+
+typedef struct my_udphdr {
+    __u16 source;
+    __u16 dest;
+    __u16 len;
+    __u16 check;
+} my_udphdr_t; 
+
+
+typedef struct my_iphdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+    __u8    ihl:4,
+	version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+    __u8    version:4,
+	ihl:4;
+#else
+#error  "Please fix <asm/byteorder.h>"
+#endif
+    __u8    tos;
+    __u16   tot_len;
+    __u16   id;
+    __u16   frag_off;
+    __u8    ttl;
+    __u8    protocol;
+    __u16   check;
+    __u32   saddr;
+    __u32   daddr;
+} my_iphdr_t; 
+
+
+typedef struct my_ethhdr {
+    unsigned char   h_dest[6];   	
+    unsigned char   h_source[6]; 	
+    unsigned short  h_proto;        
+} my_ethhdr_t; 
+
+/*
+ * Function written by ek247. Exports console output from all domains upwards 
+ * to domain0, by stuffing it into a fake network packet.
+ */
+int console_export(char *str, int len)
+{
+    struct sk_buff *skb;
+    struct my_iphdr *iph = NULL;  
+    struct my_udphdr *udph = NULL; 
+    struct my_ethhdr *ethh = NULL; 
+    int hdr_size = sizeof(struct my_iphdr) + sizeof(struct my_udphdr); 
+    u8 *skb_data;
+
+    skb = dev_alloc_skb(sizeof(struct my_ethhdr) + 
+                                   hdr_size + len + 20);
+    if ( skb == NULL ) return 0;
+
+    skb->dev = the_dev;
+    skb_data = (u8 *)map_domain_mem((skb->pf - frame_table) << PAGE_SHIFT);
+    skb_reserve(skb, 2);
+
+    /* Get a pointer to each header. */
+    ethh = (struct my_ethhdr *) 
+        (skb_data + (skb->data - skb->head));
+    iph  = (struct my_iphdr *)(ethh + 1);
+    udph = (struct my_udphdr *)(iph + 1); 
+
+    skb_reserve(skb, sizeof(struct my_ethhdr)); 
+    skb_put(skb, hdr_size + len); 
+
+    /* Build IP header. */
+    iph->version = 4;
+    iph->ihl     = 5;
+    iph->frag_off= 0;
+    iph->id      = 0xdead;
+    iph->ttl     = 255;
+    iph->protocol= 17;
+    iph->daddr   = htonl(opt_dom0_ip);
+    iph->saddr   = htonl(0xa9fe0001); 
+    iph->tot_len = htons(hdr_size + len); 
+    iph->check	 = 0;
+    iph->check   = compute_cksum((__u16 *)iph, sizeof(struct my_iphdr)/2); 
+
+    /* Build UDP header. */
+    udph->source = htons(current->domain);
+    udph->dest   = htons(666);
+    udph->len    = htons(sizeof(struct my_udphdr) + len);
+    udph->check  = 0;
+
+    /* Build the UDP payload. */
+    memcpy((char *)(udph + 1), str, len); 
+
+    /* Fix Ethernet header. */
+    memset(ethh->h_source, 0, ETH_ALEN);
+    memset(ethh->h_dest,   0, ETH_ALEN);
+    ethh->h_proto = htons(ETH_P_IP);
+    skb->mac.ethernet= (struct ethhdr *)ethh;
+
+    /* Keep the net rule tables happy. */
+    skb->src_vif = VIF_PHYSICAL_INTERFACE;
+    skb->dst_vif = 0;
+    
+    unmap_domain_mem(skb_data);
+    
+    (void)netif_rx(skb);
+
+    return 1;
+}
+
+
+long do_console_write(char *str, int count)
+{
+#define SIZEOF_BUF 256
+    unsigned char safe_str[SIZEOF_BUF];
+    unsigned char exported_str[SIZEOF_BUF];
+    unsigned long flags;
+    int i=0;
+    int j=0;
+    unsigned char prev = '\n';
+    
+    if ( count > SIZEOF_BUF ) count = SIZEOF_BUF;
+    
+    if ( copy_from_user(safe_str, str, count) )
+        return -EFAULT;
+    
+    spin_lock_irqsave(&console_lock, flags);
+
+    __putstr("DOM"); 
+    putchar(current->domain+'0'); 
+    __putstr(": ");
+    
+    for ( i = 0; i < count; i++ )
+    {
+	exported_str[j++]=safe_str[i];
+	
+        if ( !safe_str[i] ) break;
+        putchar(prev = safe_str[i]);
+    }
+    
+    if ( prev != '\n' ) putchar('\n');
+    
+    spin_unlock_irqrestore(&console_lock, flags);
+    
+    exported_str[j]='\0';
+    console_export(exported_str, j-1);
+    
+    return(0);
+}
+
+void __out_of_line_bug(int line)
+{
+    printk("kernel BUG in header file at line %d\n", line);
+    BUG();
+    for ( ; ; ) continue;
+}
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
new file mode 100644
index 0000000000..dcd1daa564
--- /dev/null
+++ b/xen/common/keyhandler.c
@@ -0,0 +1,130 @@
+#include <xeno/keyhandler.h> 
+#include <xeno/reboot.h>
+
+extern void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs);
+
+#define KEY_MAX 256
+#define STR_MAX  64
+
+typedef struct _key_te { 
+    key_handler *handler; 
+    char         desc[STR_MAX]; 
+} key_te_t; 
+
+static key_te_t key_table[KEY_MAX]; 
+    
+void add_key_handler(u_char key, key_handler *handler, char *desc) 
+{
+    int i; 
+    char *str; 
+
+    if(key_table[key].handler != NULL) 
+	printk("Warning: overwriting handler for key 0x%x\n", key); 
+
+    key_table[key].handler = handler; 
+
+    str = key_table[key].desc; 
+    for(i = 0; i < STR_MAX; i++) {
+	if(*desc) 
+	    *str++ = *desc++; 
+	else break; 
+    }
+    if (i == STR_MAX) 
+	key_table[key].desc[STR_MAX-1] = '\0'; 
+
+    return; 
+}
+
+key_handler *get_key_handler(u_char key)
+{
+    return key_table[key].handler; 
+}
+
+
+void show_handlers(u_char key, void *dev_id, struct pt_regs *regs) 
+{
+    int i; 
+
+    printk("'%c' pressed -> showing installed handlers\n", key); 
+    for(i=0; i < KEY_MAX; i++) 
+	if(key_table[i].handler) 
+	    printk(" key '%c' (ascii '%02x') => %s\n", 
+			(i<33 || i>126)?(' '):(i),i,
+			key_table[i].desc);
+    return; 
+}
+
+
+void dump_registers(u_char key, void *dev_id, struct pt_regs *regs) 
+{
+    extern void show_registers(struct pt_regs *regs); 
+
+    printk("'%c' pressed -> dumping registers\n", key); 
+    show_registers(regs); 
+    return; 
+}
+
+void halt_machine(u_char key, void *dev_id, struct pt_regs *regs) 
+{
+    printk("'%c' pressed -> rebooting machine\n", key); 
+    machine_restart(NULL); 
+    return; 
+}
+
+
+
+/* XXX SMH: this is keir's fault */
+static char *task_states[] = 
+{ 
+    "Running", 
+    "Interruptible Sleep", 
+    "Uninterruptible Sleep", 
+    NULL, "Stopped", 
+    NULL, NULL, NULL, "Dying", 
+}; 
+
+void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) 
+{
+    u_long flags; 
+    struct task_struct *p; 
+    shared_info_t *s; 
+
+    printk("'%c' pressed -> dumping task queues\n", key); 
+    read_lock_irqsave(&tasklist_lock, flags); 
+    p = &idle0_task;
+    do {
+        printk("Xen: DOM %d, CPU %d [has=%c], state = %s, "
+	       "hyp_events = %08x\n", 
+	       p->domain, p->processor, p->has_cpu ? 'T':'F', 
+	       task_states[p->state], p->hyp_events); 
+	s = p->shared_info; 
+	if(!is_idle_task(p)) {
+	    printk("Guest: events = %08lx, event_enable = %08lx\n", 
+		   s->events, s->events_enable); 
+	    printk("Notifying guest...\n"); 
+	    set_bit(_EVENT_DEBUG, &s->events); 
+	}
+    }
+
+    while ( (p = p->next_task) != &idle0_task );
+    read_unlock_irqrestore(&tasklist_lock, flags); 
+}
+
+
+void initialize_keytable() 
+{
+    int i; 
+
+    /* first initialize key handler table */
+    for(i = 0; i < KEY_MAX; i++) 
+	key_table[i].handler = (key_handler *)NULL; 
+	
+    /* setup own handlers */
+    add_key_handler('d', dump_registers, "dump registers"); 
+    add_key_handler('h', show_handlers, "show this message");
+    add_key_handler('p', perfc_printall, "print performance counters"); 
+    add_key_handler('q', do_task_queues, "dump task queues + guest state");
+    add_key_handler('R', halt_machine, "reboot machine ungracefully"); 
+    
+    return; 
+}
diff --git a/xen/common/lib.c b/xen/common/lib.c
new file mode 100644
index 0000000000..ae75196ffd
--- /dev/null
+++ b/xen/common/lib.c
@@ -0,0 +1,526 @@
+
+#include <xeno/ctype.h>
+#include <xeno/lib.h>
+
+int memcmp(const void * cs,const void * ct,size_t count)
+{
+	const unsigned char *su1, *su2;
+	signed char res = 0;
+
+	for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+		if ((res = *su1 - *su2) != 0)
+			break;
+	return res;
+}
+
+void * memcpy(void * dest,const void *src,size_t count)
+{
+	char *tmp = (char *) dest, *s = (char *) src;
+
+	while (count--)
+		*tmp++ = *s++;
+
+	return dest;
+}
+
+int strncmp(const char * cs,const char * ct,size_t count)
+{
+	register signed char __res = 0;
+
+	while (count) {
+		if ((__res = *cs - *ct++) != 0 || !*cs++)
+			break;
+		count--;
+	}
+
+	return __res;
+}
+
+int strcmp(const char * cs,const char * ct)
+{
+        register signed char __res;
+
+        while (1) {
+                if ((__res = *cs - *ct++) != 0 || !*cs++)
+                        break;
+        }
+
+        return __res;
+}
+
+char * strcpy(char * dest,const char *src)
+{
+        char *tmp = dest;
+
+        while ((*dest++ = *src++) != '\0')
+                /* nothing */;
+        return tmp;
+}
+
+char * strncpy(char * dest,const char *src,size_t count)
+{
+        char *tmp = dest;
+
+        while (count-- && (*dest++ = *src++) != '\0')
+                /* nothing */;
+
+        return tmp;
+}
+
+void * memset(void * s,int c,size_t count)
+{
+        char *xs = (char *) s;
+
+        while (count--)
+                *xs++ = c;
+
+        return s;
+}
+
+size_t strnlen(const char * s, size_t count)
+{
+        const char *sc;
+
+        for (sc = s; count-- && *sc != '\0'; ++sc)
+                /* nothing */;
+        return sc - s;
+}
+
+size_t strlen(const char * s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+char * strchr(const char * s, int c)
+{
+        for(; *s != (char) c; ++s)
+                if (*s == '\0')
+                        return NULL;
+        return (char *) s;
+}
+
+char * strstr(const char * s1,const char * s2)
+{
+        int l1, l2;
+
+        l2 = strlen(s2);
+        if (!l2)
+                return (char *) s1;
+        l1 = strlen(s1);
+        while (l1 >= l2) {
+                l1--;
+                if (!memcmp(s1,s2,l2))
+                        return (char *) s1;
+                s1++;
+        }
+        return NULL;
+}
+
+
+/* for inc/ctype.h */
+unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,                        /* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,         /* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C,                        /* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C,                        /* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,                    /* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P,                        /* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D,                        /* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P,                        /* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,      /* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U,                        /* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U,                        /* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P,                        /* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,      /* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L,                        /* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L,                        /* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C,                        /* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
+
+
+unsigned long str_to_quad(unsigned char *s)
+{
+    unsigned long quad = 0;
+    do {
+        quad <<= 8;
+        quad  |= simple_strtol(s, (char **)&s, 10);
+    }  
+    while ( *s++ == '.' );
+    return quad;
+}
+
+
+unsigned char *quad_to_str(unsigned long q, unsigned char *s)
+{
+    sprintf(s, "%ld.%ld.%ld.%ld", 
+            (q>>24)&255, (q>>16)&255, (q>>8)&255, (q>>0)&255);
+    return s;
+}
+   
+
+/* a couple of 64 bit operations ported from freebsd */
+
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+ */
+
+#include <asm/types.h>
+
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+        s64            q;              /* as a (signed) quad */
+        s64            uq;             /* as an unsigned quad */
+        long           sl[2];          /* as two signed longs */
+        unsigned long  ul[2];          /* as two unsigned longs */
+};
+/* XXX RN: Yuck hardcoded endianess :) */
+#define _QUAD_HIGHWORD 1
+#define _QUAD_LOWWORD 0
+/*
+ * Define high and low longwords.
+ */
+#define H               _QUAD_HIGHWORD
+#define L               _QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define CHAR_BIT        8               /* number of bits in a char */
+#define QUAD_BITS       (sizeof(s64) * CHAR_BIT)
+#define LONG_BITS       (sizeof(long) * CHAR_BIT)
+#define HALF_BITS       (sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)).  (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define HHALF(x)        ((x) >> HALF_BITS)
+#define LHALF(x)        ((x) & ((1 << HALF_BITS) - 1))
+#define LHUP(x)         ((x) << HALF_BITS)
+
+/*
+ * Multiprecision divide.  This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+#define	B	(1 << HALF_BITS)	/* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define	COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
+typedef unsigned short digit;
+#else
+typedef u_long digit;
+#endif
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0.  NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+	register int i;
+
+	for (i = 0; i < len; i++)
+		p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+	p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long.  As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+u64
+__qdivrem(uq, vq, arq)
+	u64 uq, vq, *arq;
+{
+	union uu tmp;
+	digit *u, *v, *q;
+	register digit v1, v2;
+	u_long qhat, rhat, t;
+	int m, n, d, j, i;
+	digit uspace[5], vspace[5], qspace[5];
+
+	/*
+	 * Take care of special cases: divide by zero, and u < v.
+	 */
+	if (vq == 0) {
+		/* divide by zero. */
+		static volatile const unsigned int zero = 0;
+
+		tmp.ul[H] = tmp.ul[L] = 1 / zero;
+		if (arq)
+			*arq = uq;
+		return (tmp.q);
+	}
+	if (uq < vq) {
+		if (arq)
+			*arq = uq;
+		return (0);
+	}
+	u = &uspace[0];
+	v = &vspace[0];
+	q = &qspace[0];
+
+	/*
+	 * Break dividend and divisor into digits in base B, then
+	 * count leading zeros to determine m and n.  When done, we
+	 * will have:
+	 *	u = (u[1]u[2]...u[m+n]) sub B
+	 *	v = (v[1]v[2]...v[n]) sub B
+	 *	v[1] != 0
+	 *	1 < n <= 4 (if n = 1, we use a different division algorithm)
+	 *	m >= 0 (otherwise u < v, which we already checked)
+	 *	m + n = 4
+	 * and thus
+	 *	m = 4 - n <= 2
+	 */
+	tmp.uq = uq;
+	u[0] = 0;
+	u[1] = HHALF(tmp.ul[H]);
+	u[2] = LHALF(tmp.ul[H]);
+	u[3] = HHALF(tmp.ul[L]);
+	u[4] = LHALF(tmp.ul[L]);
+	tmp.uq = vq;
+	v[1] = HHALF(tmp.ul[H]);
+	v[2] = LHALF(tmp.ul[H]);
+	v[3] = HHALF(tmp.ul[L]);
+	v[4] = LHALF(tmp.ul[L]);
+	for (n = 4; v[1] == 0; v++) {
+		if (--n == 1) {
+			u_long rbj;	/* r*B+u[j] (not root boy jim) */
+			digit q1, q2, q3, q4;
+
+			/*
+			 * Change of plan, per exercise 16.
+			 *	r = 0;
+			 *	for j = 1..4:
+			 *		q[j] = floor((r*B + u[j]) / v),
+			 *		r = (r*B + u[j]) % v;
+			 * We unroll this completely here.
+			 */
+			t = v[2];	/* nonzero, by definition */
+			q1 = u[1] / t;
+			rbj = COMBINE(u[1] % t, u[2]);
+			q2 = rbj / t;
+			rbj = COMBINE(rbj % t, u[3]);
+			q3 = rbj / t;
+			rbj = COMBINE(rbj % t, u[4]);
+			q4 = rbj / t;
+			if (arq)
+				*arq = rbj % t;
+			tmp.ul[H] = COMBINE(q1, q2);
+			tmp.ul[L] = COMBINE(q3, q4);
+			return (tmp.q);
+		}
+	}
+
+	/*
+	 * By adjusting q once we determine m, we can guarantee that
+	 * there is a complete four-digit quotient at &qspace[1] when
+	 * we finally stop.
+	 */
+	for (m = 4 - n; u[1] == 0; u++)
+		m--;
+	for (i = 4 - m; --i >= 0;)
+		q[i] = 0;
+	q += 4 - m;
+
+	/*
+	 * Here we run Program D, translated from MIX to C and acquiring
+	 * a few minor changes.
+	 *
+	 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+	 */
+	d = 0;
+	for (t = v[1]; t < B / 2; t <<= 1)
+		d++;
+	if (d > 0) {
+		shl(&u[0], m + n, d);		/* u <<= d */
+		shl(&v[1], n - 1, d);		/* v <<= d */
+	}
+	/*
+	 * D2: j = 0.
+	 */
+	j = 0;
+	v1 = v[1];	/* for D3 -- note that v[1..n] are constant */
+	v2 = v[2];	/* for D3 */
+	do {
+		register digit uj0, uj1, uj2;
+
+		/*
+		 * D3: Calculate qhat (\^q, in TeX notation).
+		 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+		 * let rhat = (u[j]*B + u[j+1]) mod v[1].
+		 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+		 * decrement qhat and increase rhat correspondingly.
+		 * Note that if rhat >= B, v[2]*qhat < rhat*B.
+		 */
+		uj0 = u[j + 0];	/* for D3 only -- note that u[j+...] change */
+		uj1 = u[j + 1];	/* for D3 only */
+		uj2 = u[j + 2];	/* for D3 only */
+		if (uj0 == v1) {
+			qhat = B;
+			rhat = uj1;
+			goto qhat_too_big;
+		} else {
+			u_long nn = COMBINE(uj0, uj1);
+			qhat = nn / v1;
+			rhat = nn % v1;
+		}
+		while (v2 * qhat > COMBINE(rhat, uj2)) {
+	qhat_too_big:
+			qhat--;
+			if ((rhat += v1) >= B)
+				break;
+		}
+		/*
+		 * D4: Multiply and subtract.
+		 * The variable `t' holds any borrows across the loop.
+		 * We split this up so that we do not require v[0] = 0,
+		 * and to eliminate a final special case.
+		 */
+		for (t = 0, i = n; i > 0; i--) {
+			t = u[i + j] - v[i] * qhat - t;
+			u[i + j] = LHALF(t);
+			t = (B - HHALF(t)) & (B - 1);
+		}
+		t = u[j] - t;
+		u[j] = LHALF(t);
+		/*
+		 * D5: test remainder.
+		 * There is a borrow if and only if HHALF(t) is nonzero;
+		 * in that (rare) case, qhat was too large (by exactly 1).
+		 * Fix it by adding v[1..n] to u[j..j+n].
+		 */
+		if (HHALF(t)) {
+			qhat--;
+			for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+				t += u[i + j] + v[i];
+				u[i + j] = LHALF(t);
+				t = HHALF(t);
+			}
+			u[j] = LHALF(u[j] + t);
+		}
+		q[j] = qhat;
+	} while (++j <= m);		/* D7: loop on j. */
+
+	/*
+	 * If caller wants the remainder, we have to calculate it as
+	 * u[m..m+n] >> d (this is at most n digits and thus fits in
+	 * u[m+1..m+n], but we may need more source digits).
+	 */
+	if (arq) {
+		if (d) {
+			for (i = m + n; i > m; --i)
+				u[i] = (u[i] >> d) |
+				    LHALF(u[i - 1] << (HALF_BITS - d));
+			u[i] = 0;
+		}
+		tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+		tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+		*arq = tmp.q;
+	}
+
+	tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+	tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+	return (tmp.q);
+}
+
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+s64
+__divdi3(s64 a, s64 b)
+{
+	u64 ua, ub, uq;
+	int neg;
+
+	if (a < 0)
+		ua = -(u64)a, neg = 1;
+	else
+		ua = a, neg = 0;
+	if (b < 0)
+		ub = -(u64)b, neg ^= 1;
+	else
+		ub = b;
+	uq = __qdivrem(ua, ub, (u64 *)0);
+	return (neg ? -uq : uq);
+}
+
+/*
+ * Divide two unsigned quads.
+ */
+u64
+__udivdi3(a, b)
+        u64 a, b;
+{
+
+        return (__qdivrem(a, b, (u64 *)0));
+}
diff --git a/xen/common/memory.c b/xen/common/memory.c
new file mode 100644
index 0000000000..0c534ad3d1
--- /dev/null
+++ b/xen/common/memory.c
@@ -0,0 +1,820 @@
+/******************************************************************************
+ * memory.c
+ * 
+ * Copyright (c) 2002 K A Fraser
+ * 
+ * A description of the page table API:
+ * 
+ * Domains trap to process_page_updates with a list of update requests.
+ * This is a list of (ptr, val) pairs, where the requested operation
+ * is *ptr = val.
+ * 
+ * Reference counting of pages:
+ * ----------------------------
+ * Each page has two refcounts: tot_count and type_count.
+ * 
+ * TOT_COUNT is the obvious reference count. It counts all uses of a
+ * physical page frame by a domain, including uses as a page directory,
+ * a page table, or simple mappings via a PTE. This count prevents a
+ * domain from releasing a frame back to the hypervisor's free pool when
+ * it is still referencing it!
+ * 
+ * TYPE_COUNT is more subtle. A frame can be put to one of three
+ * mutually-exclusive uses: it might be used as a page directory, or a
+ * page table, or it may be mapped writeable by the domain [of course, a
+ * frame may not be used in any of these three ways!].
+ * So, type_count is a count of the number of times a frame is being 
+ * referred to in its current incarnation. Therefore, a page can only
+ * change its type when its type count is zero.
+ * 
+ * Pinning the page type:
+ * ----------------------
+ * The type of a page can be pinned/unpinned with the commands
+ * PGEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
+ * pinning is not reference counted, so it can't be nested).
+ * This is useful to prevent a page's type count falling to zero, at which
+ * point safety checks would need to be carried out next time the count
+ * is increased again.
+ * 
+ * A further note on writeable page mappings:
+ * ------------------------------------------
+ * For simplicity, the count of writeable mappings for a page may not
+ * correspond to reality. The 'writeable count' is incremented for every
+ * PTE which maps the page with the _PAGE_RW flag set. However, for
+ * write access to be possible the page directory entry must also have
+ * its _PAGE_RW bit set. We do not check this as it complicates the 
+ * reference counting considerably [consider the case of multiple
+ * directory entries referencing a single page table, some with the RW
+ * bit set, others not -- it starts getting a bit messy].
+ * In normal use, this simplification shouldn't be a problem.
+ * However, the logic can be added if required.
+ * 
+ * One more note on read-only page mappings:
+ * -----------------------------------------
+ * We want domains to be able to map pages for read-only access. The
+ * main reason is that page tables and directories should be readable
+ * by a domain, but it would not be safe for them to be writeable.
+ * However, domains have free access to rings 1 & 2 of the Intel
+ * privilege model. In terms of page protection, these are considered
+ * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
+ * read-only restrictions are respected in supervisor mode -- if the 
+ * bit is clear then any mapped page is writeable.
+ * 
+ * We get round this by always setting the WP bit and disallowing 
+ * updates to it. This is very unlikely to cause a problem for guest
+ * OS's, which will generally use the WP bit to simplify copy-on-write
+ * implementation (in that case, OS wants a fault when it writes to
+ * an application-supplied buffer).
+ */
+
+
+/*
+ * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
+ * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.]
+ * -----------------------------------------------------------------------
+ * 
+ * *********
+ * UPDATE 15/7/02: Interface has changed --updates now specify physical
+ * address of page-table entry, rather than specifying a virtual address,
+ * so hypervisor no longer "walks" the page tables. Therefore the 
+ * solution below cannot work. Another possibility is to add a new entry
+ * to our "struct page" which says to which top-level page table each
+ * lower-level page table or writeable mapping belongs. If it belongs to more
+ * than one, we'd probably just flush on all processors running the domain.
+ * *********
+ * 
+ * ** 1 **
+ * The problem involves creating new page tables which might be mapped 
+ * writeable in the TLB of another processor. As an example, a domain might be 
+ * running in two contexts (ie. on two processors) simultaneously, using the 
+ * same top-level page table in both contexts. Now, if context 1 sends an 
+ * update request [make page P read-only, add a reference to page P as a page 
+ * table], that will succeed if there was only one writeable mapping of P. 
+ * However, that mapping may persist in the TLB of context 2.
+ * 
+ * Solution: when installing a new page table, we must flush foreign TLBs as
+ * necessary. Naive solution is to flush on any processor running our domain.
+ * Cleverer solution is to flush on any processor running same top-level page
+ * table, but this will sometimes fail (consider two different top-level page
+ * tables which have a shared lower-level page table).
+ * 
+ * A better solution: when squashing a write reference, check how many times
+ * that lowest-level table entry is referenced by ORing refcounts of tables
+ * down the page-table hierarchy. If results is != 1, we require flushing all
+ * instances of current domain if a new table is installed (because the
+ * lowest-level entry may be referenced by many top-level page tables).
+ * However, common case will be that result == 1, so we only need to flush
+ * processors with the same top-level page table. Make choice at
+ * table-installation time based on a `flush_level' flag, which is
+ * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
+ * to FLUSH_NONE, while squashed write mappings can only promote up
+ * to more aggressive flush types.
+ * 
+ * ** 2 **
+ * Same problem occurs when removing a page table, at level 1 say, then
+ * making it writeable. Need a TLB flush between otherwise another processor
+ * might write an illegal mapping into the old table, while yet another
+ * processor can use the illegal mapping because of a stale level-2 TLB
+ * entry. So, removal of a table reference sets 'flush_level' appropriately,
+ * and a flush occurs on next addition of a fresh write mapping.
+ * 
+ * BETTER SOLUTION FOR BOTH 1 AND 2:
+ * When type_refcnt goes to zero, leave old type in place (don't set to
+ * PGT_none). Then, only flush if making a page table of a page with
+ * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page
+ * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages
+ * with refcnt==0 to be reset to PGT_none. Need an array for the purpose,
+ * added to when a type_refcnt goes to zero, and emptied on a TLB flush.
+ * Either have per-domain table, or force TLB flush at end of each
+ * call to 'process_page_updates'.
+ * Most OSes will always keep a writeable reference hanging around, and
+ * page table structure is fairly static, so this mechanism should be
+ * fairly cheap.
+ * 
+ * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]:
+ * If we need to force an intermediate flush, those other processors
+ * spin until we complete, then do a single TLB flush. They can spin on
+ * the lock protecting 'process_page_updates', and continue when that
+ * is freed. Saves cost of setting up and servicing an IPI: later
+ * communication is synchronous. Processors trying to install the domain
+ * or domain&pagetable would also enter the spin.
+ * 
+ * ** 3 **
+ * Indeed, this problem generalises to reusing page tables at different
+ * levels of the hierarchy (conceptually, the guest OS can use the
+ * hypervisor to introduce illegal table entries by proxy). Consider
+ * unlinking a level-1 page table and reintroducing at level 2 with no
+ * TLB flush. Hypervisor can add a reference to some other level-1 table
+ * with the RW bit set. This is fine in the level-2 context, but some
+ * other processor may still be using that table in level-1 context
+ * (due to a stale TLB entry). At level 1 it may look like the
+ * processor has write access to the other level-1 page table! Therefore
+ * can add illegal values there with impunity :-(
+ * 
+ * Fortunately, the solution above generalises to this extended problem.
+ */
+
+/*
+ * UPDATE 12.11.02.: We no longer have struct page and mem_map. These
+ * have been replaced by struct pfn_info and frame_table respectively.
+ * 
+ * system_free_list is a list_head linking all system owned free pages.
+ * it is initialized in init_frametable.
+ *
+ * Boris Dragovic.
+ */
+ 
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/mm.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/domain_page.h>
+
+#if 0
+#define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/* Domain 0 is allowed to submit requests on behalf of others. */
+#define DOMAIN_OKAY(_f) \
+    ((((_f) & PG_domain_mask) == current->domain) || (current->domain == 0))
+
+/* 'get' checks parameter for validity before inc'ing refcnt. */
+static int get_l2_table(unsigned long page_nr);
+static int get_l1_table(unsigned long page_nr);
+static int get_page(unsigned long page_nr, int writeable);
+static int inc_page_refcnt(unsigned long page_nr, unsigned int type);
+/* 'put' does no checking because if refcnt not zero, entity must be valid. */
+static void put_l2_table(unsigned long page_nr);
+static void put_l1_table(unsigned long page_nr);
+static void put_page(unsigned long page_nr, int writeable);
+static int dec_page_refcnt(unsigned long page_nr, unsigned int type);
+
+static int mod_l2_entry(unsigned long, l2_pgentry_t);
+static int mod_l1_entry(unsigned long, l1_pgentry_t);
+
+/* frame table size and its size in pages */
+frame_table_t * frame_table;
+unsigned long frame_table_size;
+unsigned long max_page;
+
+struct list_head free_list;
+spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
+unsigned int free_pfns;
+
+static int tlb_flush[NR_CPUS];
+
+/*
+ * init_frametable:
+ * Initialise per-frame memory information. This goes directly after
+ * MAX_MONITOR_ADDRESS in physical memory.
+ */
+void __init init_frametable(unsigned long nr_pages)
+{
+    struct pfn_info *pf;
+    unsigned long page_index;
+    unsigned long flags;
+
+    memset(tlb_flush, 0, sizeof(tlb_flush));
+
+    max_page = nr_pages;
+    frame_table_size = nr_pages * sizeof(struct pfn_info);
+    frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
+    frame_table = (frame_table_t *)FRAMETABLE_VIRT_START;
+    memset(frame_table, 0, frame_table_size);
+
+    free_pfns = 0;
+
+    /* Put all domain-allocatable memory on a free list. */
+    spin_lock_irqsave(&free_list_lock, flags);
+    INIT_LIST_HEAD(&free_list);
+    for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT; 
+         page_index < nr_pages;
+         page_index++ )      
+    {
+        pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
+        list_add_tail(&pf->list, &free_list);
+        free_pfns++;
+    }
+    spin_unlock_irqrestore(&free_list_lock, flags);
+}
+
+
+/* Return original refcnt, or -1 on error. */
+static int inc_page_refcnt(unsigned long page_nr, unsigned int type)
+{
+    struct pfn_info *page;
+    unsigned long flags;
+
+    if ( page_nr >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+        return -1;
+    }
+    page = frame_table + page_nr;
+    flags = page->flags;
+    if ( !DOMAIN_OKAY(flags) )
+    {
+        MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
+        return -1;
+    }
+    if ( (flags & PG_type_mask) != type )
+    {
+        if ( page_type_count(page) != 0 )
+        {
+            MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld",
+                    page_nr << PAGE_SHIFT,
+                    flags & PG_type_mask, type, page_type_count(page));
+            return -1;
+        }
+
+        page->flags |= type;
+    }
+
+    get_page_tot(page);
+    return get_page_type(page);
+}
+
+/* Return new refcnt, or -1 on error. */
+static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
+{
+    struct pfn_info *page;
+    int ret;
+
+    if ( page_nr >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+        return -1;
+    }
+    page = frame_table + page_nr;
+    if ( !DOMAIN_OKAY(page->flags) || 
+         ((page->flags & PG_type_mask) != type) ) 
+    {
+        MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)",
+                page->flags & PG_domain_mask, page->flags & PG_type_mask,
+                type);
+        return -1;
+    }
+    ASSERT(page_type_count(page) != 0);
+    if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask;
+    put_page_tot(page);
+    return ret;
+}
+
+
+/* We allow a L2 table to map itself, to achieve a linear pagetable. */
+/* NB. There's no need for a put_twisted_l2_table() function!! */
+static int get_twisted_l2_table(unsigned long entry_pfn, l2_pgentry_t l2e)
+{
+    unsigned long l2v = l2_pgentry_val(l2e);
+
+    /* Clearly the mapping must be read-only :-) */
+    if ( (l2v & _PAGE_RW) )
+    {
+        MEM_LOG("Attempt to install twisted L2 entry with write permissions");
+        return -1;
+    }
+
+    /* This is a sufficient final check. */
+    if ( (l2v >> PAGE_SHIFT) != entry_pfn )
+    {
+        MEM_LOG("L2 tables may not map _other_ L2 tables!\n");
+        return -1;
+    }
+    
+    /* We don't bump the reference counts. */
+    return 0;
+}
+
+
+static int get_l2_table(unsigned long page_nr)
+{
+    l2_pgentry_t *p_l2_entry, l2_entry;
+    int i, ret=0;
+   
+    ret = inc_page_refcnt(page_nr, PGT_l2_page_table);
+    if ( ret != 0 ) return (ret < 0) ? ret : 0;
+    
+    /* NEW level-2 page table! Deal with every PDE in the table. */
+    p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+    {
+        l2_entry = *p_l2_entry++;
+        if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue;
+        if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
+        {
+            MEM_LOG("Bad L2 page type settings %04lx",
+                    l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE));
+            ret = -1;
+            goto out;
+        }
+        /* Assume we're mapping an L1 table, falling back to twisted L2. */
+        ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry));
+        if ( ret ) ret = get_twisted_l2_table(page_nr, l2_entry);
+        if ( ret ) goto out;
+    }
+    
+    /* Now we simply slap in our high mapping. */
+    memcpy(p_l2_entry, 
+           idle_pg_table[smp_processor_id()] + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
+           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+    p_l2_entry[(PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT) -
+              DOMAIN_ENTRIES_PER_L2_PAGETABLE] =
+        mk_l2_pgentry(__pa(current->mm.perdomain_pt) | __PAGE_HYPERVISOR);
+
+ out:
+    unmap_domain_mem(p_l2_entry);
+    return ret;
+}
+
+static int get_l1_table(unsigned long page_nr)
+{
+    l1_pgentry_t *p_l1_entry, l1_entry;
+    int i, ret;
+
+    /* Update ref count for page pointed at by PDE. */
+    ret = inc_page_refcnt(page_nr, PGT_l1_page_table);
+    if ( ret != 0 ) return (ret < 0) ? ret : 0;
+
+    /* NEW level-1 page table! Deal with every PTE in the table. */
+    p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+    {
+        l1_entry = *p_l1_entry++;
+        if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue;
+        if ( (l1_pgentry_val(l1_entry) &
+              (_PAGE_GLOBAL|_PAGE_PAT)) )
+        {
+            MEM_LOG("Bad L1 page type settings %04lx",
+                    l1_pgentry_val(l1_entry) &
+                    (_PAGE_GLOBAL|_PAGE_PAT));
+            ret = -1;
+            goto out;
+        }
+        ret = get_page(l1_pgentry_to_pagenr(l1_entry),
+                       l1_pgentry_val(l1_entry) & _PAGE_RW);
+        if ( ret ) goto out;
+    }
+
+ out:
+    /* Make sure we unmap the right page! */
+    unmap_domain_mem(p_l1_entry-1);
+    return ret;
+}
+
+static int get_page(unsigned long page_nr, int writeable)
+{
+    struct pfn_info *page;
+    unsigned long flags;
+
+    /* Update ref count for page pointed at by PTE. */
+    if ( page_nr >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+        return(-1);
+    }
+    page = frame_table + page_nr;
+    flags = page->flags;
+    if ( !DOMAIN_OKAY(flags) )
+    {
+        MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
+        return(-1);
+    }
+
+    if ( writeable )
+    {
+        if ( (flags & PG_type_mask) != PGT_writeable_page )
+        {
+            if ( page_type_count(page) != 0 )
+            {
+                MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld",
+                        flags & PG_type_mask, PGT_writeable_page,
+                        page_type_count(page));
+                return(-1);
+            }
+            page->flags |= PGT_writeable_page;
+        }
+        get_page_type(page);
+    }
+
+    get_page_tot(page);
+    
+    return(0);
+}
+
+static void put_l2_table(unsigned long page_nr)
+{
+    l2_pgentry_t *p_l2_entry, l2_entry;
+    int i;
+
+    if ( dec_page_refcnt(page_nr, PGT_l2_page_table) ) return;
+
+    /* We had last reference to level-2 page table. Free the PDEs. */
+    p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+    {
+        l2_entry = *p_l2_entry++;
+        if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) )
+            put_l1_table(l2_pgentry_to_pagenr(l2_entry));
+    }
+
+    unmap_domain_mem(p_l2_entry);
+}
+
+static void put_l1_table(unsigned long page_nr)
+{
+    l1_pgentry_t *p_l1_entry, l1_entry;
+    int i;
+
+    if ( dec_page_refcnt(page_nr, PGT_l1_page_table) ) return;
+
+    /* We had last reference to level-1 page table. Free the PTEs. */
+    p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+    {
+        l1_entry = *p_l1_entry++;
+        if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) 
+        {
+            put_page(l1_pgentry_to_pagenr(l1_entry), 
+                     l1_pgentry_val(l1_entry) & _PAGE_RW);
+        }
+    }
+
+    /* Make sure we unmap the right page! */
+    unmap_domain_mem(p_l1_entry-1);
+}
+
+static void put_page(unsigned long page_nr, int writeable)
+{
+    struct pfn_info *page;
+    ASSERT(page_nr < max_page);
+    page = frame_table + page_nr;
+    ASSERT(DOMAIN_OKAY(page->flags));
+    ASSERT((!writeable) || 
+           ((page_type_count(page) != 0) && 
+            ((page->flags & PG_type_mask) == PGT_writeable_page)));
+    if ( writeable && (put_page_type(page) == 0) )
+    {
+        tlb_flush[smp_processor_id()] = 1;
+        page->flags &= ~PG_type_mask;
+    }
+    put_page_tot(page);
+}
+
+
+static int mod_l2_entry(unsigned long pa, l2_pgentry_t new_l2_entry)
+{
+    l2_pgentry_t *p_l2_entry, old_l2_entry;
+
+    p_l2_entry = map_domain_mem(pa);
+    old_l2_entry = *p_l2_entry;
+
+    if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >=
+         DOMAIN_ENTRIES_PER_L2_PAGETABLE )
+    {
+        MEM_LOG("Illegal L2 update attempt in hypervisor area %p",
+                p_l2_entry);
+        goto fail;
+    }
+
+    if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) )
+    {
+        if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
+        {
+            MEM_LOG("Bad L2 entry val %04lx",
+                    l2_pgentry_val(new_l2_entry) & 
+                    (_PAGE_GLOBAL|_PAGE_PSE));
+            goto fail;
+        }
+        /* Differ in mapping (bits 12-31) or presence (bit 0)? */
+        if ( ((l2_pgentry_val(old_l2_entry) ^ 
+               l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
+        {
+            if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) 
+            {
+                put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
+            }
+            
+            /* Assume we're mapping an L1 table, falling back to twisted L2. */
+            if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) &&
+                 get_twisted_l2_table(pa >> PAGE_SHIFT, new_l2_entry) )
+                goto fail;
+        } 
+    }
+    else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
+    {
+        put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
+    }
+    
+    *p_l2_entry = new_l2_entry;
+    unmap_domain_mem(p_l2_entry);
+    return 0;
+
+ fail:
+    unmap_domain_mem(p_l2_entry);
+    return -1;
+}
+
+
+static int mod_l1_entry(unsigned long pa, l1_pgentry_t new_l1_entry)
+{
+    l1_pgentry_t *p_l1_entry, old_l1_entry;
+
+    p_l1_entry = map_domain_mem(pa);
+    old_l1_entry = *p_l1_entry;
+
+    if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) )
+    {
+        if ( (l1_pgentry_val(new_l1_entry) &
+              (_PAGE_GLOBAL|_PAGE_PAT)) ) 
+        {
+
+            MEM_LOG("Bad L1 entry val %04lx",
+                    l1_pgentry_val(new_l1_entry) & 
+                    (_PAGE_GLOBAL|_PAGE_PAT));
+            goto fail;
+        }
+        /*
+         * Differ in mapping (bits 12-31), writeable (bit 1), or
+         * presence (bit 0)?
+         */
+        if ( ((l1_pgentry_val(old_l1_entry) ^
+               l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
+        {
+            if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) 
+            {
+                put_page(l1_pgentry_to_pagenr(old_l1_entry),
+                         l1_pgentry_val(old_l1_entry) & _PAGE_RW);
+            }
+            
+            if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
+                          l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){
+                goto fail;
+            }
+        } 
+    }
+    else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
+    {
+        put_page(l1_pgentry_to_pagenr(old_l1_entry),
+                 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
+    }
+
+    *p_l1_entry = new_l1_entry;
+    unmap_domain_mem(p_l1_entry);
+    return 0;
+
+ fail:
+    unmap_domain_mem(p_l1_entry);
+    return -1;
+}
+
+
+static int do_extended_command(unsigned long ptr, unsigned long val)
+{
+    int err = 0;
+    unsigned long pfn = ptr >> PAGE_SHIFT;
+    struct pfn_info *page = frame_table + pfn;
+
+    switch ( (val & PGEXT_CMD_MASK) )
+    {
+    case PGEXT_PIN_L1_TABLE:
+        err = get_l1_table(pfn);
+        goto mark_as_pinned;
+    case PGEXT_PIN_L2_TABLE:
+        err = get_l2_table(pfn);
+    mark_as_pinned:
+        if ( err )
+        {
+            MEM_LOG("Error while pinning pfn %08lx", pfn);
+            break;
+        }
+        put_page_type(page);
+        put_page_tot(page);
+        if ( !(page->type_count & REFCNT_PIN_BIT) )
+        {
+            page->type_count |= REFCNT_PIN_BIT;
+            page->tot_count  |= REFCNT_PIN_BIT;
+        }
+        else
+        {
+            MEM_LOG("Pfn %08lx already pinned", pfn);
+            err = 1;
+        }
+        break;
+
+    case PGEXT_UNPIN_TABLE:
+        if ( !DOMAIN_OKAY(page->flags) )
+        {
+            err = 1;
+            MEM_LOG("Page %08lx bad domain (dom=%ld)",
+                    ptr, page->flags & PG_domain_mask);
+        }
+        else if ( (page->type_count & REFCNT_PIN_BIT) )
+        {
+            page->type_count &= ~REFCNT_PIN_BIT;
+            page->tot_count  &= ~REFCNT_PIN_BIT;
+            get_page_type(page);
+            get_page_tot(page);
+            ((page->flags & PG_type_mask) == PGT_l1_page_table) ?
+                put_l1_table(pfn) : put_l2_table(pfn);
+        }
+        else
+        {
+            err = 1;
+            MEM_LOG("Pfn %08lx not pinned", pfn);
+        }
+        break;
+
+    case PGEXT_NEW_BASEPTR:
+        err = get_l2_table(pfn);
+        if ( !err )
+        {
+            put_l2_table(pagetable_val(current->mm.pagetable) >> PAGE_SHIFT);
+            current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
+        }
+        else
+        {
+            MEM_LOG("Error while installing new baseptr %08lx %d", ptr, err);
+        }
+        /* fall through */
+        
+    case PGEXT_TLB_FLUSH:
+        tlb_flush[smp_processor_id()] = 1;
+        break;
+    
+    case PGEXT_INVLPG:
+        __flush_tlb_one(val & ~PGEXT_CMD_MASK);
+        break;
+
+    default:
+        MEM_LOG("Invalid extended pt command 0x%08lx", val & PGEXT_CMD_MASK);
+        err = 1;
+        break;
+    }
+
+    return err;
+}
+
+int do_process_page_updates(page_update_request_t *ureqs, int count)
+{
+    page_update_request_t req;
+    unsigned long flags, pfn;
+    struct pfn_info *page;
+    int err = 0, i;
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( copy_from_user(&req, ureqs, sizeof(req)) )
+        {
+            kill_domain_with_errmsg("Cannot read page update request");
+        } 
+
+        pfn = req.ptr >> PAGE_SHIFT;
+        if ( pfn >= max_page )
+        {
+            MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
+            kill_domain_with_errmsg("Page update request out of range");
+        }
+
+        err = 1;
+
+        /* Least significant bits of 'ptr' demux the operation type. */
+        switch ( req.ptr & (sizeof(l1_pgentry_t)-1) )
+        {
+            /*
+             * PGREQ_NORMAL: Normal update to any level of page table.
+             */
+        case PGREQ_NORMAL:
+            page = frame_table + pfn;
+            flags = page->flags;
+            
+            if ( DOMAIN_OKAY(flags) )
+            {
+                switch ( (flags & PG_type_mask) )
+                {
+                case PGT_l1_page_table: 
+                    err = mod_l1_entry(req.ptr, mk_l1_pgentry(req.val)); 
+                    break;
+                case PGT_l2_page_table: 
+                    err = mod_l2_entry(req.ptr, mk_l2_pgentry(req.val)); 
+                    break;
+                default:
+                    MEM_LOG("Update to non-pt page %08lx", req.ptr);
+                    break;
+                }
+            }
+            else
+            {
+                MEM_LOG("Bad domain normal update (dom %d, pfn %ld)",
+                        current->domain, pfn);
+            }
+            break;
+
+        case PGREQ_MPT_UPDATE:
+            page = frame_table + pfn;
+            if ( DOMAIN_OKAY(page->flags) )
+            {
+                machine_to_phys_mapping[pfn] = req.val;
+                err = 0;
+            }
+            else
+            {
+                MEM_LOG("Bad domain MPT update (dom %d, pfn %ld)",
+                        current->domain, pfn);
+            }
+            break;
+
+            /*
+             * PGREQ_EXTENDED_COMMAND: Extended command is specified
+             * in the least-siginificant bits of the 'value' field.
+             */
+        case PGREQ_EXTENDED_COMMAND:
+            req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
+            err = do_extended_command(req.ptr, req.val);
+            break;
+
+        case PGREQ_UNCHECKED_UPDATE:
+            req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
+            if ( current->domain == 0 )
+            {
+                unsigned long *ptr = map_domain_mem(req.ptr);
+                *ptr = req.val;
+                unmap_domain_mem(ptr);
+                err = 0;
+            }
+            else
+            {
+                MEM_LOG("Bad unchecked update attempt");
+            }
+            break;
+            
+        default:
+            MEM_LOG("Invalid page update command %08lx", req.ptr);
+            break;
+        }
+
+        if ( err )
+        {
+            kill_domain_with_errmsg("Illegal page update request");
+        }
+
+        ureqs++;
+    }
+
+    if ( tlb_flush[smp_processor_id()] )
+    {
+        tlb_flush[smp_processor_id()] = 0;
+        __write_cr3_counted(pagetable_val(current->mm.pagetable));
+
+    }
+
+    return(0);
+}
+
diff --git a/xen/common/network.c b/xen/common/network.c
new file mode 100644
index 0000000000..f761ca9ba2
--- /dev/null
+++ b/xen/common/network.c
@@ -0,0 +1,475 @@
+/* network.c
+ *
+ * Network virtualization for Xen.  Lower-level network interactions are in 
+ * net/dev.c and in the drivers.  This file contains routines to interact 
+ * with the virtual interfaces (vifs) and the virtual firewall/router through
+ * the use of rules.
+ *
+ * Copyright (c) 2002, A K Warfield and K A Fraser
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <xeno/init.h>
+#include <xeno/slab.h>
+#include <xeno/spinlock.h>
+#include <xeno/if_ether.h>
+#include <linux/skbuff.h>
+#include <xeno/netdevice.h>
+#include <xeno/in.h>
+#include <asm/domain_page.h>
+#include <asm/io.h>
+
+/* vif globals 
+ * sys_vif_list is a lookup table for vifs, used in packet forwarding.
+ * it will be replaced later by something a little more flexible.
+ */
+
+int sys_vif_count;                                  /* global vif count */
+net_vif_t *sys_vif_list[MAX_SYSTEM_VIFS];           /* global vif array */
+net_rule_ent_t *net_rule_list;                      /* global list of rules */
+kmem_cache_t *net_vif_cache;                        
+kmem_cache_t *net_rule_cache;
+static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED;   /* rule mutex */
+static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED;    /* vif mutex */
+
+void print_net_rule_list();
+
+
+/* ----[ VIF Functions ]----------------------------------------------------*/
+
+/* create_net_vif - Create a new vif and append it to the specified domain.
+ * 
+ * the domain is examined to determine how many vifs currently are allocated
+ * and the newly allocated vif is appended.  The vif is also added to the
+ * global list.
+ * 
+ */
+
+net_vif_t *create_net_vif(int domain)
+{
+    net_vif_t *new_vif;
+    net_ring_t *new_ring;
+    net_shadow_ring_t *shadow_ring;
+    struct task_struct *dom_task;
+    
+    if ( !(dom_task = find_domain_by_id(domain)) )
+        return NULL;
+    
+    if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL )
+        return NULL;
+    
+    new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
+    memset(new_ring, 0, sizeof(net_ring_t));
+
+    shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
+    if ( shadow_ring == NULL ) goto fail;
+    
+    shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
+                    * sizeof(rx_shadow_entry_t), GFP_KERNEL);
+    shadow_ring->tx_ring = kmalloc(TX_RING_SIZE
+                    * sizeof(tx_shadow_entry_t), GFP_KERNEL);
+    if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) )
+            goto fail;
+
+    shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
+    shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0;
+    
+    /* Fill in the new vif struct. */
+    
+    new_vif->net_ring = new_ring;
+    new_vif->shadow_ring = shadow_ring;
+    
+    new_vif->domain = find_domain_by_id(domain);
+
+    new_vif->list.next = NULL;
+    
+    write_lock(&sys_vif_lock);
+    new_vif->id = sys_vif_count;
+    sys_vif_list[sys_vif_count++] = new_vif;
+    write_unlock(&sys_vif_lock);
+
+    dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif;
+    dom_task->num_net_vifs++;
+    
+    return new_vif;
+    
+fail:
+    kmem_cache_free(net_vif_cache, new_vif);
+    if ( shadow_ring != NULL )
+    {
+        if ( shadow_ring->rx_ring ) kfree(shadow_ring->rx_ring);
+        if ( shadow_ring->tx_ring ) kfree(shadow_ring->tx_ring);
+        kfree(shadow_ring);
+    }
+    return NULL;
+}
+
+/* delete_net_vif - Delete the last vif in the given domain. 
+ *
+ * There doesn't seem to be any reason (yet) to be able to axe an arbitrary 
+ * vif, by vif id. 
+ */
+
+void destroy_net_vif(struct task_struct *p)
+{
+    int i;
+
+    if ( p->num_net_vifs <= 0 ) return; // nothing to do.
+    
+    i = --p->num_net_vifs;
+    
+    write_lock(&sys_vif_lock);
+    sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
+    write_unlock(&sys_vif_lock);        
+   
+    kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
+    kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
+    kfree(p->net_vif_list[i]->shadow_ring);
+    kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
+}
+
+/* vif_query - Call from the proc file system to get a list of vifs 
+ * assigned to a particular domain.
+ */
+
+void vif_query(vif_query_t *vq)
+{
+    struct task_struct *dom_task;
+    char buf[128];
+    int i;
+
+    if ( !(dom_task = find_domain_by_id(vq->domain)) ) return;
+
+    *buf = '\0';
+
+    for ( i = 0; i < dom_task->num_net_vifs; i++ )
+        sprintf(buf + strlen(buf), "%d\n", dom_task->net_vif_list[i]->id);
+
+    copy_to_user(vq->buf, buf, strlen(buf) + 1);
+    
+}
+        
+
+/* print_vif_list - Print the contents of the global vif table.
+ */
+
+void print_vif_list()
+{
+    int i;
+    net_vif_t *v;
+
+    printk("Currently, there are %d VIFs.\n", sys_vif_count);
+    for ( i = 0; i<sys_vif_count; i++ )
+    {
+        v = sys_vif_list[i];
+        printk("] VIF Entry %d(%d):\n", i, v->id);
+        printk("   > net_ring*:  %p\n", v->net_ring);
+        printk("   > domain   :  %u\n", v->domain->domain);
+    }
+}
+
+/* ----[ Net Rule Functions ]-----------------------------------------------*/
+
+/* add_net_rule - Add a new network filter rule.
+ */
+
+int add_net_rule(net_rule_t *rule)
+{
+    net_rule_ent_t *new_ent;
+    
+    if ( (new_ent = kmem_cache_alloc(net_rule_cache, GFP_KERNEL)) == NULL )
+    {
+        return -ENOMEM;
+    }
+
+    memcpy(&new_ent->r, rule, sizeof(net_rule_t));
+
+    write_lock(&net_rule_lock);
+    new_ent->next = net_rule_list;
+    net_rule_list = new_ent;
+    write_unlock(&net_rule_lock);
+
+    return 0;
+}
+
+/* delete_net_rule - Delete an existing network rule.
+ */
+
+int delete_net_rule(net_rule_t *rule)
+{
+    net_rule_ent_t *ent = net_rule_list, *prev = NULL;
+    while ( (ent) && ((memcmp(rule, &ent->r, sizeof(net_rule_t))) != 0) )
+    {
+        prev = ent;
+        ent = ent->next;
+    }
+
+    if (ent != NULL)
+    {
+        write_lock(&net_rule_lock);
+        if (prev != NULL)
+        {
+            prev->next = ent->next;
+        }
+        else
+        {
+            net_rule_list = ent->next;
+        }
+        kmem_cache_free(net_rule_cache, ent);
+        write_unlock(&net_rule_lock);
+    }
+    return 0;
+}
+ 
+/* add_default_net_rule - Set up default network path (ie for dom0).
+ * 
+ * this is a utility function to route all traffic with the specified
+ * ip address to the specified vif.  It's used to set up domain zero.
+ */
+
+void add_default_net_rule(int vif_id, u32 ipaddr)
+{
+    net_rule_t new_rule;
+
+    //outbound rule.
+    memset(&new_rule, 0, sizeof(net_rule_t));
+    new_rule.src_addr = ipaddr;
+    new_rule.src_addr_mask = 0xffffffff;
+    new_rule.src_interface = vif_id;
+    new_rule.dst_interface = VIF_PHYSICAL_INTERFACE;
+    new_rule.action = NETWORK_ACTION_ACCEPT;
+    new_rule.proto = NETWORK_PROTO_ANY;
+    add_net_rule(&new_rule);
+
+    //inbound rule;
+    memset(&new_rule, 0, sizeof(net_rule_t));
+    new_rule.dst_addr = ipaddr;
+    new_rule.dst_addr_mask = 0xffffffff;
+    new_rule.src_interface = VIF_PHYSICAL_INTERFACE;
+    new_rule.dst_interface = vif_id;
+    new_rule.action = NETWORK_ACTION_ACCEPT;
+    new_rule.proto = NETWORK_PROTO_ANY;
+    add_net_rule(&new_rule);
+
+}
+
+/* print_net_rule - Print a single net rule.
+ */
+
+void print_net_rule(net_rule_t *r)
+{
+    printk("===] NET RULE:\n");
+    printk("=] src_addr         : %lu\n", (unsigned long) r->src_addr);
+    printk("=] src_addr_mask    : %lu\n", (unsigned long) r->src_addr_mask);   
+    printk("=] dst_addr         : %lu\n", (unsigned long) r->dst_addr);
+    printk("=] dst_addr_mask    : %lu\n", (unsigned long) r->dst_addr_mask);
+    printk("=] src_port         : %u\n", r->src_port);
+    printk("=] src_port_mask    : %u\n", r->src_port_mask);
+    printk("=] dst_port         : %u\n", r->dst_port);
+    printk("=] dst_port_mask    : %u\n", r->dst_port_mask);
+    printk("=] dst_proto        : %u\n", r->proto);
+    printk("=] src_interface    : %d\n", r->src_interface);
+    printk("=] dst_interface    : %d\n", r->dst_interface);
+    printk("=] action           : %u\n", r->action);
+}
+
+/* print_net_rule_list - Print the global rule table.
+ */
+
+void print_net_rule_list()
+{
+    net_rule_ent_t *ent;
+    int count = 0;
+    
+    read_lock(&net_rule_lock);
+
+    ent = net_rule_list;
+    
+    while (ent) 
+    {
+        print_net_rule(&ent->r);
+        ent = ent->next;
+        count++;
+    }
+    printk("\nTotal of %d rules.\n", count);
+
+    read_unlock(&net_rule_lock);
+}
+
+/* net_find_rule - Find the destination vif according to the current rules.
+ *
+ * Apply the rules to this skbuff and return the vif id that it is bound for.
+ * If there is no match, VIF_DROP is returned.
+ */
+
+int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port, 
+                  int src_vif)
+{
+    net_rule_ent_t *ent;
+    int dest = VIF_DROP;
+    
+    read_lock(&net_rule_lock);
+    
+    ent = net_rule_list;
+    
+    while (ent)
+    {
+        if ( ((ent->r.src_interface == src_vif)
+              || (ent->r.src_interface == VIF_ANY_INTERFACE)) &&
+
+             (!((ent->r.src_addr ^ src_addr) & ent->r.src_addr_mask )) &&
+             (!((ent->r.dst_addr ^ dst_addr) & ent->r.dst_addr_mask )) &&
+             (!((ent->r.src_port ^ src_port) & ent->r.src_port_mask )) &&
+             (!((ent->r.dst_port ^ dst_port) & ent->r.dst_port_mask )) &&
+             
+             ((ent->r.proto == NETWORK_PROTO_ANY) ||
+              ((ent->r.proto == NETWORK_PROTO_IP)  &&
+               (nproto == (u8)ETH_P_IP)) ||
+              ((ent->r.proto == NETWORK_PROTO_ARP) &&
+               (nproto == (u8)ETH_P_ARP)) ||
+              ((ent->r.proto == NETWORK_PROTO_TCP) &&
+               (tproto == IPPROTO_TCP)) ||
+              ((ent->r.proto == NETWORK_PROTO_UDP) &&
+               (tproto == IPPROTO_UDP)))
+           )
+        {
+            break;
+        }
+        ent = ent->next;
+    }
+
+    if (ent) (dest = ent->r.dst_interface);
+    read_unlock(&net_rule_lock);
+    return dest;
+}
+
+/* net_get_target_vif - Find the vif that the given sk_buff is bound for.
+ *
+ * This is intended to be the main interface to the VFR rules, where 
+ * net_find_rule (above) is a private aspect of the current matching 
+ * implementation.  All in-hypervisor routing should use this function only
+ * to ensure that this can be rewritten later.
+ *
+ * Currently, network rules are stored in a global linked list.  New rules are
+ * added to the front of this list, and (at present) the first matching rule
+ * determines the vif that a packet is sent to.  This is obviously not ideal,
+ * it might be more advisable to have chains, or at lest most-specific 
+ * matching, and moreover routing latency increases linearly (for old rules)
+ * as new rules are added.  
+ *
+ * net_get_target_vif examines the sk_buff and pulls out the relevant fields
+ * based on the packet type.  it then calls net_find_rule to scan the rule 
+ * list.
+ */
+
+#define net_get_target_vif(skb) __net_get_target_vif(skb->data, skb->len, skb->src_vif)
+
+int __net_get_target_vif(u8 *data, unsigned int len, int src_vif)
+{
+    int target = VIF_DROP;
+    u8 *h_raw, *nh_raw;
+    
+    if ( len < ETH_HLEN ) goto drop;
+
+    nh_raw = data + ETH_HLEN;
+    switch ( ntohs(*(unsigned short *)(data + 12)) )
+    {
+    case ETH_P_ARP:
+        if ( len < (ETH_HLEN + 28) ) goto drop;
+        target = net_find_rule((u8)ETH_P_ARP, 0, ntohl(*(u32 *)(nh_raw + 14)),
+                               ntohl(*(u32 *)(nh_raw + 24)), 0, 0, 
+                               src_vif);
+        break;
+
+    case ETH_P_IP:
+        if ( len < (ETH_HLEN + 20) ) goto drop;
+        h_raw =  data + ((*(unsigned char *)(nh_raw)) & 0x0f) * 4;
+        
+        /* XXX For now, we ignore ports. */
+#if 0
+        target = net_find_rule((u8)ETH_P_IP,  *(u8 *)(nh_raw + 9),
+                               ntohl(*(u32 *)(nh_raw + 12)),
+                               ntohl(*(u32 *)(nh_raw + 16)),
+                               ntohs(*(u16 *)(h_raw)),
+                               ntohs(*(u16 *)(h_raw + 2)), 
+                               src_vif);
+#else
+        target = net_find_rule((u8)ETH_P_IP,  *(u8 *)(data + 9),
+                               ntohl(*(u32 *)(nh_raw + 12)),
+                               ntohl(*(u32 *)(nh_raw + 16)),
+                               0,
+                               0, 
+                               src_vif);
+#endif
+    }
+    return target;
+    
+ drop:
+    return VIF_DROP;
+}
+
+/* ----[ Syscall Interface ]------------------------------------------------*/
+
+/* 
+ * This is the hook function to handle guest-invoked traps requesting 
+ * changes to the network system.
+ */
+
+long do_network_op(network_op_t *u_network_op)
+{
+    long ret=0;
+    network_op_t op;
+    
+    if ( current->domain != 0 )
+        return -EPERM;
+
+    if ( copy_from_user(&op, u_network_op, sizeof(op)) )
+        return -EFAULT;
+    switch ( op.cmd )
+    {
+
+    case NETWORK_OP_ADDRULE:
+    {
+        add_net_rule(&op.u.net_rule);
+    }
+    break;
+
+    case NETWORK_OP_DELETERULE:
+    {
+        delete_net_rule(&op.u.net_rule);
+    }
+    break;
+
+    case NETWORK_OP_GETRULELIST:
+    {
+        // This should eventually ship a rule list up to the VM
+        // to be printed in its procfs.  For now, we just print the rules.
+        
+        print_net_rule_list();
+    }
+    break;
+
+    case NETWORK_OP_VIFQUERY:
+    {
+        vif_query(&op.u.vif_query);
+    }
+    
+    default:
+        ret = -ENOSYS;
+    }
+
+    return ret;
+}
+
+void __init net_init (void)
+{
+    sys_vif_count = 0;
+    memset(sys_vif_list, 0, sizeof(sys_vif_list));
+    net_rule_list = NULL;
+    net_vif_cache = kmem_cache_create("net_vif_cache", sizeof(net_vif_t),
+                                    0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+    net_rule_cache = kmem_cache_create("net_rule_cache", sizeof(net_rule_ent_t),
+                                    0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+}
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
new file mode 100644
index 0000000000..1bfeed440f
--- /dev/null
+++ b/xen/common/page_alloc.c
@@ -0,0 +1,288 @@
+/******************************************************************************
+ * page_alloc.c
+ * 
+ * Simple buddy allocator for Xenoserver hypervisor.
+ * 
+ * Copyright (c) 2002 K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <asm/page.h>
+#include <xeno/spinlock.h>
+#include <xeno/slab.h>
+
+static spinlock_t alloc_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*********************
+ * ALLOCATION BITMAP
+ *  One bit per page of memory. Bit set => page is allocated.
+ */
+
+static unsigned long *alloc_bitmap;
+#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
+
+#define allocated_in_map(_pn) \
+(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+
+
+/*
+ * Hint regarding bitwise arithmetic in map_{alloc,free}:
+ *  -(1<<n)  sets all bits >= n. 
+ *  (1<<n)-1 sets all bits <  n.
+ * Variable names in map_{alloc,free}:
+ *  *_idx == Index into `alloc_bitmap' array.
+ *  *_off == Bit offset within an element of the `alloc_bitmap' array.
+ */
+
+static void map_alloc(unsigned long first_page, unsigned long nr_pages)
+{
+    unsigned long start_off, end_off, curr_idx, end_idx;
+
+    curr_idx  = first_page / PAGES_PER_MAPWORD;
+    start_off = first_page & (PAGES_PER_MAPWORD-1);
+    end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+    end_off   = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+    if ( curr_idx == end_idx )
+    {
+        alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+    }
+    else 
+    {
+        alloc_bitmap[curr_idx] |= -(1<<start_off);
+        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
+        alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+    }
+}
+
+
+static void map_free(unsigned long first_page, unsigned long nr_pages)
+{
+    unsigned long start_off, end_off, curr_idx, end_idx;
+
+    curr_idx = first_page / PAGES_PER_MAPWORD;
+    start_off = first_page & (PAGES_PER_MAPWORD-1);
+    end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+    end_off   = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+    if ( curr_idx == end_idx )
+    {
+        alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+    }
+    else 
+    {
+        alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+        while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
+        alloc_bitmap[curr_idx] &= -(1<<end_off);
+    }
+}
+
+
+
+/*************************
+ * BINARY BUDDY ALLOCATOR
+ */
+
+typedef struct chunk_head_st chunk_head_t;
+typedef struct chunk_tail_st chunk_tail_t;
+
+struct chunk_head_st {
+    chunk_head_t  *next;
+    chunk_head_t **pprev;
+    int            level;
+};
+
+struct chunk_tail_st {
+    int level;
+};
+
+/* Linked lists of free chunks of different powers-of-two in size. */
+#define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
+static chunk_head_t *free_head[FREELIST_SIZE];
+static chunk_head_t  free_tail[FREELIST_SIZE];
+#define FREELIST_EMPTY(_l) ((_l)->next == NULL)
+
+#define round_pgdown(_p)  ((_p)&PAGE_MASK)
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+
+/* Initialise allocator, placing addresses [@min,@max] in free pool. */
+void __init init_page_allocator(unsigned long min, unsigned long max)
+{
+    int i;
+    unsigned long range, bitmap_size;
+    chunk_head_t *ch;
+    chunk_tail_t *ct;
+
+    for ( i = 0; i < FREELIST_SIZE; i++ )
+    {
+        free_head[i]       = &free_tail[i];
+        free_tail[i].pprev = &free_head[i];
+        free_tail[i].next  = NULL;
+    }
+
+    min = round_pgup  (min);
+    max = round_pgdown(max);
+
+    /* Allocate space for the allocation bitmap. */
+    bitmap_size  = (max+1) >> (PAGE_SHIFT+3);
+    bitmap_size  = round_pgup(bitmap_size);
+    alloc_bitmap = (unsigned long *)__va(min);
+    min         += bitmap_size;
+    range        = max - min;
+
+    /* All allocated by default. */
+    memset(alloc_bitmap, ~0, bitmap_size);
+    /* Free up the memory we've been given to play with. */
+    map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
+    
+    /* The buddy lists are addressed in high memory. */
+    min += PAGE_OFFSET;
+    max += PAGE_OFFSET;
+
+    while ( range != 0 )
+    {
+        /*
+         * Next chunk is limited by alignment of min, but also
+         * must not be bigger than remaining range.
+         */
+        for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
+            if ( min & (1<<i) ) break;
+
+        ch = (chunk_head_t *)min;
+        min   += (1<<i);
+        range -= (1<<i);
+        ct = (chunk_tail_t *)min-1;
+        i -= PAGE_SHIFT;
+        ch->level       = i;
+        ch->next        = free_head[i];
+        ch->pprev       = &free_head[i];
+        ch->next->pprev = &ch->next;
+        free_head[i]    = ch;
+        ct->level       = i;
+    }
+}
+
+
+/* Allocate 2^@order contiguous pages. */
+unsigned long __get_free_pages(int mask, int order)
+{
+    int i, attempts = 0;
+    chunk_head_t *alloc_ch, *spare_ch;
+    chunk_tail_t            *spare_ct;
+    unsigned long           flags;
+
+retry:
+    spin_lock_irqsave(&alloc_lock, flags);
+
+
+    /* Find smallest order which can satisfy the request. */
+    for ( i = order; i < FREELIST_SIZE; i++ ) {
+	if ( !FREELIST_EMPTY(free_head[i]) ) 
+	    break;
+    }
+
+    if ( i == FREELIST_SIZE ) goto no_memory;
+ 
+    /* Unlink a chunk. */
+    alloc_ch = free_head[i];
+    free_head[i] = alloc_ch->next;
+    alloc_ch->next->pprev = alloc_ch->pprev;
+
+    /* We may have to break the chunk a number of times. */
+    while ( i != order )
+    {
+        /* Split into two equal parts. */
+        i--;
+        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
+        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
+
+        /* Create new header for spare chunk. */
+        spare_ch->level = i;
+        spare_ch->next  = free_head[i];
+        spare_ch->pprev = &free_head[i];
+        spare_ct->level = i;
+
+        /* Link in the spare chunk. */
+        spare_ch->next->pprev = &spare_ch->next;
+        free_head[i] = spare_ch;
+    }
+    
+    map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<<order);
+
+    spin_unlock_irqrestore(&alloc_lock, flags);
+
+    return((unsigned long)alloc_ch);
+
+ no_memory:
+    if ( attempts++ < 8 )
+    {
+        spin_unlock_irqrestore(&alloc_lock, flags);
+        kmem_cache_reap(0);
+        goto retry;
+    }
+
+    printk("Cannot handle page request order %d!\n", order);
+    dump_slabinfo();
+
+    return 0;
+}
+
+
+/* Free 2^@order pages at location @p. */
+void __free_pages(unsigned long p, int order)
+{
+    unsigned long size = 1 << (order + PAGE_SHIFT);
+    chunk_head_t *ch;
+    chunk_tail_t *ct;
+    unsigned long flags;
+    unsigned long pagenr = __pa(p) >> PAGE_SHIFT;
+
+    spin_lock_irqsave(&alloc_lock, flags);
+
+    map_free(pagenr, 1<<order);
+    
+    /* Merge chunks as far as possible. */
+    for ( ; ; )
+    {
+        if ( (p & size) )
+        {
+            /* Merge with predecessor block? */
+            if ( allocated_in_map(pagenr-1) ) break;
+            ct = (chunk_tail_t *)p - 1;
+            if ( ct->level != order ) break;
+            ch = (chunk_head_t *)(p - size);
+            p -= size;
+        }
+        else
+        {
+            /* Merge with successor block? */
+            if ( allocated_in_map(pagenr+(1<<order)) ) break;
+            ch = (chunk_head_t *)(p + size);
+            if ( ch->level != order ) break;
+        }
+        
+        /* Okay, unlink the neighbour. */
+        *ch->pprev = ch->next;
+        ch->next->pprev = ch->pprev;
+
+        order++;
+        size <<= 1;
+    }
+
+    /* Okay, add the final chunk to the appropriate free list. */
+    ch = (chunk_head_t *)p;
+    ct = (chunk_tail_t *)(p+size)-1;
+    ct->level = order;
+    ch->level = order;
+    ch->pprev = &free_head[order];
+    ch->next  = free_head[order];
+    ch->next->pprev = &ch->next;
+    free_head[order] = ch;
+
+    spin_unlock_irqrestore(&alloc_lock, flags);
+}
diff --git a/xen/common/perfc.c b/xen/common/perfc.c
new file mode 100644
index 0000000000..55554eba70
--- /dev/null
+++ b/xen/common/perfc.c
@@ -0,0 +1,81 @@
+/*
+ * xen performance counters
+ */
+
+#include <xeno/perfc.h>
+#include <xeno/keyhandler.h> 
+
+#define PERFCOUNTER( var, name ) "[0]"name"\0",
+#define PERFCOUNTER_ARRAY( var, name, size )  "["#size"]"name"\0",
+
+char* perfc_name[] = {
+#include <xeno/perfc_defn.h>
+};
+
+struct perfcounter_t perfcounters;
+
+void __perfc_print (unsigned long counter[], int offset)
+{
+  int loop;
+  int total_size = 0;
+  int element_size = 0;
+  int num = 0;
+
+  for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
+  {
+    num = sscanf (perfc_name[loop], "[%d]", &element_size);
+    total_size += element_size == 0 ? 1 : element_size;
+    if (total_size > offset) break;
+  }
+  if (loop == sizeof(perfc_name) / sizeof(char *))
+  {
+    printf ("error: couldn't find variable\n"); 
+    return;
+  }
+  if (element_size == 0)                                   /* single counter */
+  {
+    printf ("%10ld  0x%08lx  %s\n", counter[0], counter[0],
+	    perfc_name[loop] + 2 + num);
+  }
+  else                                                  /* show entire array */
+  {
+    for (loop = 0; loop < element_size; loop++)
+    {
+      printf ("%10ld  0x%08lx  %s:%d\n", 
+	      counter[loop], counter[loop], 
+	      perfc_name[loop] + 2 + num, loop);
+    }
+  }
+  return;
+}
+
+void perfc_printall (u_char key, void *dev_id, struct pt_regs *regs)
+{
+  int loop, idx;
+  int element_size;
+  int num;
+  unsigned long *counters = (unsigned long *)&perfcounters;
+
+  printf ("xen performance counters\n");
+  for (loop = 0; loop < sizeof(perfc_name) / sizeof(char *); loop++)
+  {
+    num = sscanf (perfc_name[loop], "[%d]", &element_size);
+    
+    for (idx = 0; idx < (element_size ? element_size : 1); idx++)
+    {
+      if (element_size)
+      {
+	printf ("%10ld  0x%08lx  %s:%d\n", 
+		*counters, *counters, perfc_name[loop] + num + 2, idx);
+      }
+      else
+      {
+	printf ("%10ld  0x%08lx  %s\n", 
+		*counters, *counters, perfc_name[loop] + num + 2);
+      }
+      counters++;
+    }
+  }
+
+  return;
+}
diff --git a/xen/common/resource.c b/xen/common/resource.c
new file mode 100644
index 0000000000..406fb256a7
--- /dev/null
+++ b/xen/common/resource.c
@@ -0,0 +1,332 @@
+/*
+ *	linux/kernel/resource.c
+ *
+ * Copyright (C) 1999	Linus Torvalds
+ * Copyright (C) 1999	Martin Mares <mj@ucw.cz>
+ *
+ * Arbitrary resource management.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+
+struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO };
+struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM };
+
+static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
+
+/*
+ * This generates reports for /proc/ioports and /proc/iomem
+ */
+static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
+{
+	if (offset < 0)
+		offset = 0;
+
+	while (entry) {
+		const char *name = entry->name;
+		unsigned long from, to;
+
+		if ((int) (end-buf) < 80)
+			return buf;
+
+		from = entry->start;
+		to = entry->end;
+		if (!name)
+			name = "<BAD>";
+
+		buf += sprintf(buf, fmt + offset, from, to, name);
+		if (entry->child)
+			buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
+		entry = entry->sibling;
+	}
+
+	return buf;
+}
+
+int get_resource_list(struct resource *root, char *buf, int size)
+{
+	char *fmt;
+	int retval;
+
+	fmt = "        %08lx-%08lx : %s\n";
+	if (root->end < 0x10000)
+		fmt = "        %04lx-%04lx : %s\n";
+	read_lock(&resource_lock);
+	retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
+	read_unlock(&resource_lock);
+	return retval;
+}	
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
+{
+	unsigned long start = new->start;
+	unsigned long end = new->end;
+	struct resource *tmp, **p;
+
+	if (end < start)
+		return root;
+	if (start < root->start)
+		return root;
+	if (end > root->end)
+		return root;
+	p = &root->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp || tmp->start > end) {
+			new->sibling = tmp;
+			*p = new;
+			new->parent = root;
+			return NULL;
+		}
+		p = &tmp->sibling;
+		if (tmp->end < start)
+			continue;
+		return tmp;
+	}
+}
+
+static int __release_resource(struct resource *old)
+{
+	struct resource *tmp, **p;
+
+	p = &old->parent->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp)
+			break;
+		if (tmp == old) {
+			*p = tmp->sibling;
+			old->parent = NULL;
+			return 0;
+		}
+		p = &tmp->sibling;
+	}
+	return -EINVAL;
+}
+
+int request_resource(struct resource *root, struct resource *new)
+{
+	struct resource *conflict;
+
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, new);
+	write_unlock(&resource_lock);
+	return conflict ? -EBUSY : 0;
+}
+
+int release_resource(struct resource *old)
+{
+	int retval;
+
+	write_lock(&resource_lock);
+	retval = __release_resource(old);
+	write_unlock(&resource_lock);
+	return retval;
+}
+
+int check_resource(struct resource *root, unsigned long start, unsigned long len)
+{
+	struct resource *conflict, tmp;
+
+	tmp.start = start;
+	tmp.end = start + len - 1;
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, &tmp);
+	if (!conflict)
+		__release_resource(&tmp);
+	write_unlock(&resource_lock);
+	return conflict ? -EBUSY : 0;
+}
+
+/*
+ * Find empty slot in the resource tree given range and alignment.
+ */
+static int find_resource(struct resource *root, struct resource *new,
+			 unsigned long size,
+			 unsigned long min, unsigned long max,
+			 unsigned long align,
+			 void (*alignf)(void *, struct resource *,
+					unsigned long, unsigned long),
+			 void *alignf_data)
+{
+	struct resource *this = root->child;
+
+	new->start = root->start;
+	for(;;) {
+		if (this)
+			new->end = this->start;
+		else
+			new->end = root->end;
+		if (new->start < min)
+			new->start = min;
+		if (new->end > max)
+			new->end = max;
+		new->start = (new->start + align - 1) & ~(align - 1);
+		if (alignf)
+			alignf(alignf_data, new, size, align);
+		if (new->start < new->end && new->end - new->start + 1 >= size) {
+			new->end = new->start + size - 1;
+			return 0;
+		}
+		if (!this)
+			break;
+		new->start = this->end + 1;
+		this = this->sibling;
+	}
+	return -EBUSY;
+}
+
+/*
+ * Allocate empty slot in the resource tree given range and alignment.
+ */
+int allocate_resource(struct resource *root, struct resource *new,
+		      unsigned long size,
+		      unsigned long min, unsigned long max,
+		      unsigned long align,
+		      void (*alignf)(void *, struct resource *,
+				     unsigned long, unsigned long),
+		      void *alignf_data)
+{
+	int err;
+
+	write_lock(&resource_lock);
+	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+	if (err >= 0 && __request_resource(root, new))
+		err = -EBUSY;
+	write_unlock(&resource_lock);
+	return err;
+}
+
+/*
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
+ */
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
+{
+	struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+
+	if (res) {
+		memset(res, 0, sizeof(*res));
+		res->name = name;
+		res->start = start;
+		res->end = start + n - 1;
+		res->flags = IORESOURCE_BUSY;
+
+		write_lock(&resource_lock);
+
+		for (;;) {
+			struct resource *conflict;
+
+			conflict = __request_resource(parent, res);
+			if (!conflict)
+				break;
+			if (conflict != parent) {
+				parent = conflict;
+				if (!(conflict->flags & IORESOURCE_BUSY))
+					continue;
+			}
+
+			/* Uhhuh, that didn't work out.. */
+			kfree(res);
+			res = NULL;
+			break;
+		}
+		write_unlock(&resource_lock);
+	}
+	return res;
+}
+
+int __check_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource * res;
+
+	res = __request_region(parent, start, n, "check-region");
+	if (!res)
+		return -EBUSY;
+
+	release_resource(res);
+	kfree(res);
+	return 0;
+}
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource **p;
+	unsigned long end;
+
+	p = &parent->child;
+	end = start + n - 1;
+
+	for (;;) {
+		struct resource *res = *p;
+
+		if (!res)
+			break;
+		if (res->start <= start && res->end >= end) {
+			if (!(res->flags & IORESOURCE_BUSY)) {
+				p = &res->child;
+				continue;
+			}
+			if (res->start != start || res->end != end)
+				break;
+			*p = res->sibling;
+			kfree(res);
+			return;
+		}
+		p = &res->sibling;
+	}
+	printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
+}
+
+
+#if 0
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
+{
+	static int reserved = 0;
+	static struct resource reserve[MAXRESERVE];
+
+	for (;;) {
+		int io_start, io_num;
+		int x = reserved;
+
+		if (get_option (&str, &io_start) != 2)
+			break;
+		if (get_option (&str, &io_num)   == 0)
+			break;
+		if (x < MAXRESERVE) {
+			struct resource *res = reserve + x;
+			res->name = "reserved";
+			res->start = io_start;
+			res->end = io_start + io_num - 1;
+			res->flags = IORESOURCE_BUSY;
+			res->child = NULL;
+			if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+				reserved = x+1;
+		}
+	}
+	return 1;
+}
+
+__setup("reserve=", reserve_setup);
+#endif
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
new file mode 100644
index 0000000000..787b43d900
--- /dev/null
+++ b/xen/common/schedule.c
@@ -0,0 +1,371 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: schedule.c
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Nov 2002
+ * 
+ * Environment: Xen Hypervisor
+ * Description: CPU scheduling
+ *				partially moved from domain.c
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/event.h>
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+#include <xeno/interrupt.h>
+
+#undef SCHEDULER_TRACE
+#ifdef SCHEDULER_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+/*
+ * per CPU data for the scheduler.
+ */
+typedef struct schedule_data_st
+{
+    spinlock_t lock;
+    struct list_head runqueue;
+    struct task_struct *prev, *curr;
+} __cacheline_aligned schedule_data_t;
+schedule_data_t schedule_data[NR_CPUS];
+
+static __cacheline_aligned struct ac_timer s_timer[NR_CPUS];
+
+/*
+ * Some convenience functions
+ */
+
+static inline void __add_to_runqueue(struct task_struct * p)
+{
+    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __move_last_runqueue(struct task_struct * p)
+{
+    list_del(&p->run_list);
+    list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __move_first_runqueue(struct task_struct * p)
+{
+    list_del(&p->run_list);
+    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __del_from_runqueue(struct task_struct * p)
+{
+    list_del(&p->run_list);
+    p->run_list.next = NULL;
+}
+
+static inline int __task_on_runqueue(struct task_struct *p)
+{
+    return (p->run_list.next != NULL);
+}
+
+
+/*
+ * Add a new domain to the scheduler
+ */
+void sched_add_domain(struct task_struct *p) 
+{
+    p->state      = TASK_UNINTERRUPTIBLE;
+}
+
+/*
+ * Remove domain to the scheduler
+ */
+void sched_rem_domain(struct task_struct *p) 
+{
+    p->state = TASK_DYING;
+}
+
+
+/*
+ * wake up a domain which had been sleeping
+ */
+int wake_up(struct task_struct *p)
+{
+    unsigned long flags;
+    int ret = 0;
+    spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
+    if ( __task_on_runqueue(p) ) goto out;
+    p->state = TASK_RUNNING;
+    __add_to_runqueue(p);
+    ret = 1;
+
+ out:
+    spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
+    return ret;
+}
+
+static void process_timeout(unsigned long __data)
+{
+    struct task_struct * p = (struct task_struct *) __data;
+    wake_up(p);
+}
+
+long schedule_timeout(long timeout)
+{
+    struct timer_list timer;
+    unsigned long expire;
+    
+    switch (timeout)
+    {
+    case MAX_SCHEDULE_TIMEOUT:
+        /*
+         * These two special cases are useful to be comfortable in the caller.
+         * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
+         * negative value but I' d like to return a valid offset (>=0) to allow
+         * the caller to do everything it want with the retval.
+         */
+        schedule();
+        goto out;
+    default:
+        /*
+         * Another bit of PARANOID. Note that the retval will be 0 since no
+         * piece of kernel is supposed to do a check for a negative retval of
+         * schedule_timeout() (since it should never happens anyway). You just
+         * have the printk() that will tell you if something is gone wrong and
+         * where.
+         */
+        if (timeout < 0)
+        {
+            printk(KERN_ERR "schedule_timeout: wrong timeout "
+                   "value %lx from %p\n", timeout,
+                   __builtin_return_address(0));
+            current->state = TASK_RUNNING;
+            goto out;
+        }
+    }
+    
+    expire = timeout + jiffies;
+    
+    init_timer(&timer);
+    timer.expires = expire;
+    timer.data = (unsigned long) current;
+    timer.function = process_timeout;
+    
+    add_timer(&timer);
+    schedule();
+    del_timer_sync(&timer);
+    
+    timeout = expire - jiffies;
+    
+ out:
+    return timeout < 0 ? 0 : timeout;
+}
+
+/* RN: XXX turn this into do_halt() */
+/*
+ * yield the current process
+ */
+long do_sched_op(void)
+{
+    current->state = TASK_INTERRUPTIBLE;
+    schedule();
+    return 0;
+}
+
+
+void reschedule(struct task_struct *p)
+{
+    int cpu = p->processor;
+    struct task_struct *curr;
+    unsigned long flags;
+
+    if (p->has_cpu)
+		return;
+
+    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
+    curr = schedule_data[cpu].curr;
+    if (is_idle_task(curr)) {
+        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+#ifdef CONFIG_SMP
+        if (cpu != smp_processor_id())
+			smp_send_event_check_cpu(cpu);
+#endif
+    } else {
+        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+    }
+}
+
+
+/*
+ * Pick the next domain to run
+ */
+
+asmlinkage void schedule(void)
+{
+    struct task_struct *prev, *next, *p;
+    struct list_head *tmp;
+    int this_cpu;
+
+ need_resched_back:
+    prev = current;
+    this_cpu = prev->processor;
+
+    spin_lock_irq(&schedule_data[this_cpu].lock);
+
+    ASSERT(!in_interrupt());
+    ASSERT(__task_on_runqueue(prev));
+
+	__move_last_runqueue(prev);
+
+    switch ( prev->state )
+    {
+    case TASK_INTERRUPTIBLE:
+        if ( signal_pending(prev) )
+        {
+            prev->state = TASK_RUNNING;
+            break;
+        }
+    default:
+        __del_from_runqueue(prev);
+    case TASK_RUNNING:;
+    }
+    clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
+
+    next = NULL;
+    list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
+        p = list_entry(tmp, struct task_struct, run_list);
+        next = p;
+        if ( !is_idle_task(next) ) break;
+    }
+
+    prev->has_cpu = 0;
+    next->has_cpu = 1;
+
+    schedule_data[this_cpu].prev = prev;
+    schedule_data[this_cpu].curr = next;
+
+    spin_unlock_irq(&schedule_data[this_cpu].lock);
+
+    if ( unlikely(prev == next) )
+    {
+        /* We won't go through the normal tail, so do this by hand */
+        prev->policy &= ~SCHED_YIELD;
+        goto same_process;
+    }
+
+    prepare_to_switch();
+    switch_to(prev, next);
+    prev = schedule_data[this_cpu].prev;
+    
+    prev->policy &= ~SCHED_YIELD;
+    if ( prev->state == TASK_DYING ) release_task(prev);
+
+ same_process:
+    update_dom_time(current->shared_info);
+
+    if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
+        goto need_resched_back;
+    return;
+}
+
+/*
+ * The scheduling timer.
+ */
+static __cacheline_aligned int count[NR_CPUS];
+static void sched_timer(unsigned long foo)
+{
+    int 				cpu  = smp_processor_id();
+    struct task_struct *curr = schedule_data[cpu].curr;
+    s_time_t			now;
+    int 				res;
+
+    /* reschedule after each 5 ticks */
+    if (count[cpu] >= 5) {
+        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+        count[cpu] = 0;
+    }
+    count[cpu]++;
+
+    /*
+     * deliver virtual timer interrups to domains if we are CPU 0 XXX RN: We
+     * don't have a per CPU list of domains yet. Otherwise would use that.
+     * Plus, this should be removed anyway once Domains "know" about virtual
+     * time and timeouts. But, it's better here then where it was before.
+     */
+    if (cpu == 0) {
+        struct task_struct *p;
+        unsigned long cpu_mask = 0;
+
+        /* send virtual timer interrupt */
+        read_lock(&tasklist_lock);
+        p = &idle0_task;
+        do {
+            if ( is_idle_task(p) ) continue;
+            cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
+        }
+        while ( (p = p->next_task) != &idle0_task );
+        read_unlock(&tasklist_lock);
+        guest_event_notify(cpu_mask);
+    }
+
+ again:
+    now = NOW();
+    s_timer[cpu].expires  = now + MILLISECS(10);
+    res=add_ac_timer(&s_timer[cpu]);
+
+    TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n",
+               cpu, (u32)(now>>32), (u32)now,
+               (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires));
+    if (res==1)
+        goto again;
+
+}
+
+
+/*
+ * Initialise the data structures
+ */
+void __init scheduler_init(void)
+{
+    int i;
+
+    printk("Initialising schedulers\n");
+
+    for ( i = 0; i < NR_CPUS; i++ )
+    {
+        INIT_LIST_HEAD(&schedule_data[i].runqueue);
+        spin_lock_init(&schedule_data[i].lock);
+        schedule_data[i].prev = &idle0_task;
+        schedule_data[i].curr = &idle0_task;
+		
+        /* a timer for each CPU  */
+        init_ac_timer(&s_timer[i]);
+        s_timer[i].function = &sched_timer;
+    }
+}
+
+/*
+ * Start a scheduler for each CPU
+ * This has to be done *after* the timers, e.g., APICs, have been initialised
+ */
+void schedulers_start(void) 
+{	
+    printk("Start schedulers\n");
+    __cli();
+    sched_timer(0);
+    smp_call_function((void *)sched_timer, NULL, 1, 1);
+    __sti();
+}
diff --git a/xen/common/slab.c b/xen/common/slab.c
new file mode 100644
index 0000000000..3452e89aa7
--- /dev/null
+++ b/xen/common/slab.c
@@ -0,0 +1,1945 @@
+/*
+ * linux/mm/slab.c
+ * Written by Mark Hemment, 1996/97.
+ * (markhe@nextd.demon.co.uk)
+ *
+ * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
+ *
+ * Major cleanup, different bufctl logic, per-cpu arrays
+ *	(c) 2000 Manfred Spraul
+ *
+ * An implementation of the Slab Allocator as described in outline in;
+ *	UNIX Internals: The New Frontiers by Uresh Vahalia
+ *	Pub: Prentice Hall	ISBN 0-13-101908-2
+ * or with a little more detail in;
+ *	The Slab Allocator: An Object-Caching Kernel Memory Allocator
+ *	Jeff Bonwick (Sun Microsystems).
+ *	Presented at: USENIX Summer 1994 Technical Conference
+ *
+ *
+ * The memory is organized in caches, one cache for each object type.
+ * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
+ * Each cache consists out of many slabs (they are small (usually one
+ * page long) and always contiguous), and each slab contains multiple
+ * initialized objects.
+ *
+ * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
+ * normal). If you need a special memory type, then must create a new
+ * cache for that memory type.
+ *
+ * In order to reduce fragmentation, the slabs are sorted in 3 groups:
+ *   full slabs with 0 free objects
+ *   partial slabs
+ *   empty slabs with no allocated objects
+ *
+ * If partial slabs exist, then new allocations come from these slabs,
+ * otherwise from empty slabs or new slabs are allocated.
+ *
+ * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
+ * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
+ *
+ * On SMP systems, each cache has a short per-cpu head array, most allocs
+ * and frees go into that array, and if that array overflows, then 1/2
+ * of the entries in the array are given back into the global cache.
+ * This reduces the number of spinlock operations.
+ *
+ * The c_cpuarray may not be read with enabled local interrupts.
+ *
+ * SMP synchronization:
+ *  constructors and destructors are called without any locking.
+ *  Several members in kmem_cache_t and slab_t never change, they
+ *	are accessed without any locking.
+ *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
+ *  The non-constant members are protected with a per-cache irq spinlock.
+ *
+ * Further notes from the original documentation:
+ *
+ * 11 April '97.  Started multi-threading - markhe
+ *	The global cache-chain is protected by the semaphore 'cache_chain_sem'.
+ *	The sem is only needed when accessing/extending the cache-chain, which
+ *	can never happen inside an interrupt (kmem_cache_create(),
+ *	kmem_cache_shrink() and kmem_cache_reap()).
+ *
+ *	To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
+ *	maybe be sleeping and therefore not holding the semaphore/lock), the
+ *	growing field is used.  This also prevents reaping from a cache.
+ *
+ *	At present, each engine can be growing a cache.  This should be blocked.
+ *
+ */
+
+/*
+ * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ *		  SLAB_RED_ZONE & SLAB_POISON.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * STATS	- 1 to collect stats for /proc/slabinfo.
+ *		  0 for faster, smaller code (especially in the critical paths).
+ *
+ * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/slab.h>
+#include <xeno/list.h>
+#include <xeno/spinlock.h>
+#include <xeno/errno.h>
+#include <xeno/smp.h>
+#include <xeno/sched.h>
+
+
+#ifdef CONFIG_DEBUG_SLAB
+#define	DEBUG		1
+#define	STATS		1
+#define	FORCED_DEBUG	1
+#else
+#define	DEBUG		0
+#define	STATS		0
+#define	FORCED_DEBUG	0
+#endif
+
+/*
+ * Parameters for kmem_cache_reap
+ */
+#define REAP_SCANLEN	10
+#define REAP_PERFECT	10
+
+/* Shouldn't this be in a header file somewhere? */
+#define	BYTES_PER_WORD		sizeof(void *)
+
+/* Legal flag mask for kmem_cache_create(). */
+#if DEBUG
+# define CREATE_MASK	(SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
+			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
+			 SLAB_NO_REAP | SLAB_CACHE_DMA)
+#else
+# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
+#endif
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementaion relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+#define BUFCTL_END 0xffffFFFF
+#define	SLAB_LIMIT 0xffffFFFE
+typedef unsigned int kmem_bufctl_t;
+
+/* Max number of objs-per-slab for caches which use off-slab slabs.
+ * Needed to avoid a possible looping condition in kmem_cache_grow().
+ */
+static unsigned long offslab_limit;
+
+/*
+ * slab_t
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+typedef struct slab_s {
+	struct list_head	list;
+	unsigned long		colouroff;
+	void			*s_mem;		/* including colour offset */
+	unsigned int		inuse;		/* num of objs active in slab */
+	kmem_bufctl_t		free;
+} slab_t;
+
+#define slab_bufctl(slabp) \
+	((kmem_bufctl_t *)(((slab_t*)slabp)+1))
+
+/*
+ * cpucache_t
+ *
+ * Per cpu structures
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ */
+typedef struct cpucache_s {
+	unsigned int avail;
+	unsigned int limit;
+} cpucache_t;
+
+#define cc_entry(cpucache) \
+	((void **)(((cpucache_t*)(cpucache))+1))
+#define cc_data(cachep) \
+	((cachep)->cpudata[smp_processor_id()])
+/*
+ * kmem_cache_t
+ *
+ * manages a cache.
+ */
+
+#define CACHE_NAMELEN	20	/* max name length for a slab cache */
+
+struct kmem_cache_s {
+/* 1) each alloc & free */
+	/* full, partial first, then free */
+	struct list_head	slabs_full;
+	struct list_head	slabs_partial;
+	struct list_head	slabs_free;
+	unsigned int		objsize;
+	unsigned int	 	flags;	/* constant flags */
+	unsigned int		num;	/* # of objs per slab */
+	spinlock_t		spinlock;
+#ifdef CONFIG_SMP
+	unsigned int		batchcount;
+#endif
+
+/* 2) slab additions /removals */
+	/* order of pgs per slab (2^n) */
+	unsigned int		gfporder;
+
+	/* force GFP flags, e.g. GFP_DMA */
+	unsigned int		gfpflags;
+
+	size_t			colour;		/* cache colouring range */
+	unsigned int		colour_off;	/* colour offset */
+	unsigned int		colour_next;	/* cache colouring */
+	kmem_cache_t		*slabp_cache;
+	unsigned int		growing;
+	unsigned int		dflags;		/* dynamic flags */
+
+	/* constructor func */
+	void (*ctor)(void *, kmem_cache_t *, unsigned long);
+
+	/* de-constructor func */
+	void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
+	unsigned long		failures;
+
+/* 3) cache creation/removal */
+	char			name[CACHE_NAMELEN];
+	struct list_head	next;
+#ifdef CONFIG_SMP
+/* 4) per-cpu data */
+	cpucache_t		*cpudata[NR_CPUS];
+#endif
+#if STATS
+	unsigned long		num_active;
+	unsigned long		num_allocations;
+	unsigned long		high_mark;
+	unsigned long		grown;
+	unsigned long		reaped;
+	unsigned long 		errors;
+#ifdef CONFIG_SMP
+	atomic_t		allochit;
+	atomic_t		allocmiss;
+	atomic_t		freehit;
+	atomic_t		freemiss;
+#endif
+#endif
+};
+
+/* internal c_flags */
+#define	CFLGS_OFF_SLAB	0x010000UL	/* slab management in own cache */
+#define	CFLGS_OPTIMIZE	0x020000UL	/* optimized slab lookup */
+
+/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
+#define	DFLGS_GROWN	0x000001UL	/* don't reap a recently grown */
+
+#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
+#define	OPTIMIZE(x)	((x)->flags & CFLGS_OPTIMIZE)
+#define	GROWN(x)	((x)->dlags & DFLGS_GROWN)
+
+#if STATS
+#define	STATS_INC_ACTIVE(x)	((x)->num_active++)
+#define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
+#define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
+#define	STATS_INC_GROWN(x)	((x)->grown++)
+#define	STATS_INC_REAPED(x)	((x)->reaped++)
+#define	STATS_SET_HIGH(x)	do { if ((x)->num_active > (x)->high_mark) \
+					(x)->high_mark = (x)->num_active; \
+				} while (0)
+#define	STATS_INC_ERR(x)	((x)->errors++)
+#else
+#define	STATS_INC_ACTIVE(x)	do { } while (0)
+#define	STATS_DEC_ACTIVE(x)	do { } while (0)
+#define	STATS_INC_ALLOCED(x)	do { } while (0)
+#define	STATS_INC_GROWN(x)	do { } while (0)
+#define	STATS_INC_REAPED(x)	do { } while (0)
+#define	STATS_SET_HIGH(x)	do { } while (0)
+#define	STATS_INC_ERR(x)	do { } while (0)
+#endif
+
+#if STATS && defined(CONFIG_SMP)
+#define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)
+#define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)
+#define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)
+#define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)
+#else
+#define STATS_INC_ALLOCHIT(x)	do { } while (0)
+#define STATS_INC_ALLOCMISS(x)	do { } while (0)
+#define STATS_INC_FREEHIT(x)	do { } while (0)
+#define STATS_INC_FREEMISS(x)	do { } while (0)
+#endif
+
+#if DEBUG
+/* Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define	RED_MAGIC1	0x5A2CF071UL	/* when obj is active */
+#define	RED_MAGIC2	0x170FC2A5UL	/* when obj is inactive */
+
+/* ...and for poisoning */
+#define	POISON_BYTE	0x5a		/* byte value for poisoning */
+#define	POISON_END	0xa5		/* end-byte of poisoning */
+
+#endif
+
+/* maximum size of an obj (in 2^order pages) */
+#define	MAX_OBJ_ORDER	5	/* 32 pages */
+
+/*
+ * Do not go above this order unless 0 objects fit into the slab.
+ */
+#define	BREAK_GFP_ORDER_HI	2
+#define	BREAK_GFP_ORDER_LO	1
+static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+
+/*
+ * Absolute limit for the gfp order
+ */
+#define	MAX_GFP_ORDER	5	/* 32 pages */
+
+
+/* Macros for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. These are used to find the slab an obj belongs to.
+ * With kfree(), these are used to find the cache which an obj belongs to.
+ */
+#define	SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
+#define	GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
+#define	SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
+#define	GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
+
+/* Size description struct for general caches. */
+typedef struct cache_sizes {
+	size_t		 cs_size;
+	kmem_cache_t	*cs_cachep;
+	kmem_cache_t	*cs_dmacachep;
+} cache_sizes_t;
+
+static cache_sizes_t cache_sizes[] = {
+#if PAGE_SIZE == 4096
+	{    32,	NULL, NULL},
+#endif
+	{    64,	NULL, NULL},
+	{   128,	NULL, NULL},
+	{   256,	NULL, NULL},
+	{   512,	NULL, NULL},
+	{  1024,	NULL, NULL},
+	{  2048,	NULL, NULL},
+	{  4096,	NULL, NULL},
+	{  8192,	NULL, NULL},
+	{ 16384,	NULL, NULL},
+	{ 32768,	NULL, NULL},
+	{ 65536,	NULL, NULL},
+	{131072,	NULL, NULL},
+	{     0,	NULL, NULL}
+};
+
+/* internal cache of cache description objs */
+static kmem_cache_t cache_cache = {
+	slabs_full:	LIST_HEAD_INIT(cache_cache.slabs_full),
+	slabs_partial:	LIST_HEAD_INIT(cache_cache.slabs_partial),
+	slabs_free:	LIST_HEAD_INIT(cache_cache.slabs_free),
+	objsize:	sizeof(kmem_cache_t),
+	flags:		SLAB_NO_REAP,
+	spinlock:	SPIN_LOCK_UNLOCKED,
+	colour_off:	L1_CACHE_BYTES,
+	name:		"kmem_cache",
+};
+
+/* Guard access to the cache-chain. */
+/* KAF: No semaphores, as we'll never wait around for I/O. */
+static spinlock_t cache_chain_sem;
+#define init_MUTEX(_m)   spin_lock_init(_m)
+#define down(_m)         spin_lock_irqsave(_m,spin_flags)
+#define up(_m)           spin_unlock_irqrestore(_m,spin_flags)
+
+/* Place maintainer for reaping. */
+static kmem_cache_t *clock_searchp = &cache_cache;
+
+#define cache_chain (cache_cache.next)
+
+#ifdef CONFIG_SMP
+/*
+ * chicken and egg problem: delay the per-cpu array allocation
+ * until the general caches are up.
+ */
+static int g_cpucache_up;
+
+static void enable_cpucache (kmem_cache_t *cachep);
+static void enable_all_cpucaches (void);
+#endif
+
+/* Cal the num objs, wastage, and bytes left over for a given slab size. */
+static void kmem_cache_estimate (unsigned long gfporder, size_t size,
+		 int flags, size_t *left_over, unsigned int *num)
+{
+	int i;
+	size_t wastage = PAGE_SIZE<<gfporder;
+	size_t extra = 0;
+	size_t base = 0;
+
+	if (!(flags & CFLGS_OFF_SLAB)) {
+		base = sizeof(slab_t);
+		extra = sizeof(kmem_bufctl_t);
+	}
+	i = 0;
+	while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
+		i++;
+	if (i > 0)
+		i--;
+
+	if (i > SLAB_LIMIT)
+		i = SLAB_LIMIT;
+
+	*num = i;
+	wastage -= i*size;
+	wastage -= L1_CACHE_ALIGN(base+i*extra);
+	*left_over = wastage;
+}
+
+/* Initialisation - setup the `cache' cache. */
+void __init kmem_cache_init(void)
+{
+	size_t left_over;
+
+	init_MUTEX(&cache_chain_sem);
+	INIT_LIST_HEAD(&cache_chain);
+
+	kmem_cache_estimate(0, cache_cache.objsize, 0,
+			&left_over, &cache_cache.num);
+	if (!cache_cache.num)
+		BUG();
+
+	cache_cache.colour = left_over/cache_cache.colour_off;
+	cache_cache.colour_next = 0;
+}
+
+
+/* Initialisation - setup remaining internal and general caches.
+ * Called after the gfp() functions have been enabled, and before smp_init().
+ */
+void __init kmem_cache_sizes_init(unsigned long num_physpages)
+{
+	cache_sizes_t *sizes = cache_sizes;
+	char name[20];
+	/*
+	 * Fragmentation resistance on low memory - only use bigger
+	 * page orders on machines with more than 32MB of memory.
+	 */
+	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
+		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+	do {
+		/* For performance, all the general caches are L1 aligned.
+		 * This should be particularly beneficial on SMP boxes, as it
+		 * eliminates "false sharing".
+		 * Note for systems short on memory removing the alignment will
+		 * allow tighter packing of the smaller caches. */
+		sprintf(name,"size-%Zd",sizes->cs_size);
+		if (!(sizes->cs_cachep =
+			kmem_cache_create(name, sizes->cs_size,
+					0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
+			BUG();
+		}
+
+		/* Inc off-slab bufctl limit until the ceiling is hit. */
+		if (!(OFF_SLAB(sizes->cs_cachep))) {
+			offslab_limit = sizes->cs_size-sizeof(slab_t);
+			offslab_limit /= 2;
+		}
+		sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
+		sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
+			      SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
+		if (!sizes->cs_dmacachep)
+			BUG();
+		sizes++;
+	} while (sizes->cs_size);
+}
+
+int __init kmem_cpucache_init(void)
+{
+#ifdef CONFIG_SMP
+	g_cpucache_up = 1;
+	enable_all_cpucaches();
+#endif
+	return 0;
+}
+
+/*__initcall(kmem_cpucache_init);*/
+
+/* Interface to system's page allocator. No need to hold the cache-lock.
+ */
+static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
+{
+	void	*addr;
+
+	/*
+	 * If we requested dmaable memory, we will get it. Even if we
+	 * did not request dmaable memory, we might get it, but that
+	 * would be relatively rare and ignorable.
+	 */
+	flags |= cachep->gfpflags;
+	addr = (void*) __get_free_pages(flags, cachep->gfporder);
+	/* Assume that now we have the pages no one else can legally
+	 * messes with the 'struct page's.
+	 * However vm_scan() might try to test the structure to see if
+	 * it is a named-page or buffer-page.  The members it tests are
+	 * of no interest here.....
+	 */
+	return addr;
+}
+
+/* Interface to system's page release. */
+static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
+{
+	unsigned long i = (1<<cachep->gfporder);
+	struct pfn_info *page = virt_to_page(addr);
+
+	/* free_pages() does not clear the type bit - we do that.
+	 * The pages have been unlinked from their cache-slab,
+	 * but their 'struct page's might be accessed in
+	 * vm_scan(). Shouldn't be a worry.
+	 */
+	while (i--) {
+		PageClearSlab(page);
+		page++;
+	}
+
+	free_pages((unsigned long)addr, cachep->gfporder);
+}
+
+#if DEBUG
+static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
+{
+	int size = cachep->objsize;
+	if (cachep->flags & SLAB_RED_ZONE) {
+		addr += BYTES_PER_WORD;
+		size -= 2*BYTES_PER_WORD;
+	}
+	memset(addr, POISON_BYTE, size);
+	*(unsigned char *)(addr+size-1) = POISON_END;
+}
+
+static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
+{
+	int size = cachep->objsize;
+	void *end;
+	if (cachep->flags & SLAB_RED_ZONE) {
+		addr += BYTES_PER_WORD;
+		size -= 2*BYTES_PER_WORD;
+	}
+	end = memchr(addr, POISON_END, size);
+	if (end != (addr+size-1))
+		return 1;
+	return 0;
+}
+#endif
+
+/* Destroy all the objs in a slab, and release the mem back to the system.
+ * Before calling the slab must have been unlinked from the cache.
+ * The cache-lock is not held/needed.
+ */
+static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
+{
+	if (cachep->dtor
+#if DEBUG
+		|| cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
+#endif
+	) {
+		int i;
+		for (i = 0; i < cachep->num; i++) {
+			void* objp = slabp->s_mem+cachep->objsize*i;
+#if DEBUG
+			if (cachep->flags & SLAB_RED_ZONE) {
+				if (*((unsigned long*)(objp)) != RED_MAGIC1)
+					BUG();
+				if (*((unsigned long*)(objp + cachep->objsize
+						-BYTES_PER_WORD)) != RED_MAGIC1)
+					BUG();
+				objp += BYTES_PER_WORD;
+			}
+#endif
+			if (cachep->dtor)
+				(cachep->dtor)(objp, cachep, 0);
+#if DEBUG
+			if (cachep->flags & SLAB_RED_ZONE) {
+				objp -= BYTES_PER_WORD;
+			}	
+			if ((cachep->flags & SLAB_POISON)  &&
+				kmem_check_poison_obj(cachep, objp))
+				BUG();
+#endif
+		}
+	}
+
+	kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
+	if (OFF_SLAB(cachep))
+		kmem_cache_free(cachep->slabp_cache, slabp);
+}
+
+/**
+ * kmem_cache_create - Create a cache.
+ * @name: A string which is used in /proc/slabinfo to identify this cache.
+ * @size: The size of objects to be created in this cache.
+ * @offset: The offset to use within the page.
+ * @flags: SLAB flags
+ * @ctor: A constructor for the objects.
+ * @dtor: A destructor for the objects.
+ *
+ * Returns a ptr to the cache on success, NULL on failure.
+ * Cannot be called within a int, but can be interrupted.
+ * The @ctor is run when new pages are allocated by the cache
+ * and the @dtor is run before the pages are handed back.
+ * The flags are
+ *
+ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
+ * to catch references to uninitialised memory.
+ *
+ * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
+ * for buffer overruns.
+ *
+ * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
+ * memory pressure.
+ *
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
+ * cacheline.  This can be beneficial if you're counting cycles as closely
+ * as davem.
+ */
+kmem_cache_t *
+kmem_cache_create (const char *name, size_t size, size_t offset,
+	unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+	void (*dtor)(void*, kmem_cache_t *, unsigned long))
+{
+	const char *func_nm = KERN_ERR "kmem_create: ";
+	size_t left_over, align, slab_size;
+	kmem_cache_t *cachep = NULL;
+        unsigned long spin_flags;
+
+	/*
+	 * Sanity checks... these are all serious usage bugs.
+	 */
+	if ((!name) ||
+		((strlen(name) >= CACHE_NAMELEN - 1)) ||
+		(size < BYTES_PER_WORD) ||
+		(size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
+		(dtor && !ctor) ||
+		(offset < 0 || offset > size))
+			BUG();
+
+#if DEBUG
+	if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
+		/* No constructor, but inital state check requested */
+		printk("%sNo con, but init state check requested - %s\n", func_nm, name);
+		flags &= ~SLAB_DEBUG_INITIAL;
+	}
+
+	if ((flags & SLAB_POISON) && ctor) {
+		/* request for poisoning, but we can't do that with a constructor */
+		printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
+		flags &= ~SLAB_POISON;
+	}
+#if FORCED_DEBUG
+	if (size < (PAGE_SIZE>>3))
+		/*
+		 * do not red zone large object, causes severe
+		 * fragmentation.
+		 */
+		flags |= SLAB_RED_ZONE;
+	if (!ctor)
+		flags |= SLAB_POISON;
+#endif
+#endif
+
+	/*
+	 * Always checks flags, a caller might be expecting debug
+	 * support which isn't available.
+	 */
+	if (flags & ~CREATE_MASK)
+		BUG();
+
+	/* Get cache's description obj. */
+	cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
+	if (!cachep)
+		goto opps;
+	memset(cachep, 0, sizeof(kmem_cache_t));
+
+	/* Check that size is in terms of words.  This is needed to avoid
+	 * unaligned accesses for some archs when redzoning is used, and makes
+	 * sure any on-slab bufctl's are also correctly aligned.
+	 */
+	if (size & (BYTES_PER_WORD-1)) {
+		size += (BYTES_PER_WORD-1);
+		size &= ~(BYTES_PER_WORD-1);
+		printk("%sForcing size word alignment - %s\n", func_nm, name);
+	}
+	
+#if DEBUG
+	if (flags & SLAB_RED_ZONE) {
+		/*
+		 * There is no point trying to honour cache alignment
+		 * when redzoning.
+		 */
+		flags &= ~SLAB_HWCACHE_ALIGN;
+		size += 2*BYTES_PER_WORD;	/* words for redzone */
+	}
+#endif
+	align = BYTES_PER_WORD;
+	if (flags & SLAB_HWCACHE_ALIGN)
+		align = L1_CACHE_BYTES;
+
+	/* Determine if the slab management is 'on' or 'off' slab. */
+	if (size >= (PAGE_SIZE>>3))
+		/*
+		 * Size is large, assume best to place the slab management obj
+		 * off-slab (should allow better packing of objs).
+		 */
+		flags |= CFLGS_OFF_SLAB;
+
+	if (flags & SLAB_HWCACHE_ALIGN) {
+		/* Need to adjust size so that objs are cache aligned. */
+		/* Small obj size, can get at least two per cache line. */
+		/* FIXME: only power of 2 supported, was better */
+		while (size < align/2)
+			align /= 2;
+		size = (size+align-1)&(~(align-1));
+	}
+
+	/* Cal size (in pages) of slabs, and the num of objs per slab.
+	 * This could be made much more intelligent.  For now, try to avoid
+	 * using high page-orders for slabs.  When the gfp() funcs are more
+	 * friendly towards high-order requests, this should be changed.
+	 */
+	do {
+		unsigned int break_flag = 0;
+cal_wastage:
+		kmem_cache_estimate(cachep->gfporder, size, flags,
+						&left_over, &cachep->num);
+		if (break_flag)
+			break;
+		if (cachep->gfporder >= MAX_GFP_ORDER)
+			break;
+		if (!cachep->num)
+			goto next;
+		if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
+			/* Oops, this num of objs will cause problems. */
+			cachep->gfporder--;
+			break_flag++;
+			goto cal_wastage;
+		}
+
+		/*
+		 * Large num of objs is good, but v. large slabs are currently
+		 * bad for the gfp()s.
+		 */
+		if (cachep->gfporder >= slab_break_gfp_order)
+			break;
+
+		if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
+			break;	/* Acceptable internal fragmentation. */
+next:
+		cachep->gfporder++;
+	} while (1);
+
+	if (!cachep->num) {
+		printk("kmem_cache_create: couldn't create cache %s.\n", name);
+		kmem_cache_free(&cache_cache, cachep);
+		cachep = NULL;
+		goto opps;
+	}
+	slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
+
+	/*
+	 * If the slab has been placed off-slab, and we have enough space then
+	 * move it on-slab. This is at the expense of any extra colouring.
+	 */
+	if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+		flags &= ~CFLGS_OFF_SLAB;
+		left_over -= slab_size;
+	}
+
+	/* Offset must be a multiple of the alignment. */
+	offset += (align-1);
+	offset &= ~(align-1);
+	if (!offset)
+		offset = L1_CACHE_BYTES;
+	cachep->colour_off = offset;
+	cachep->colour = left_over/offset;
+
+	/* init remaining fields */
+	if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
+		flags |= CFLGS_OPTIMIZE;
+
+	cachep->flags = flags;
+	cachep->gfpflags = 0;
+	if (flags & SLAB_CACHE_DMA)
+		cachep->gfpflags |= GFP_DMA;
+	spin_lock_init(&cachep->spinlock);
+	cachep->objsize = size;
+	INIT_LIST_HEAD(&cachep->slabs_full);
+	INIT_LIST_HEAD(&cachep->slabs_partial);
+	INIT_LIST_HEAD(&cachep->slabs_free);
+
+	if (flags & CFLGS_OFF_SLAB)
+		cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
+	cachep->ctor = ctor;
+	cachep->dtor = dtor;
+	/* Copy name over so we don't have problems with unloaded modules */
+	strcpy(cachep->name, name);
+
+#ifdef CONFIG_SMP
+	if (g_cpucache_up)
+		enable_cpucache(cachep);
+#endif
+	/* Need the semaphore to access the chain. */
+	down(&cache_chain_sem);
+	{
+		struct list_head *p;
+
+		list_for_each(p, &cache_chain) {
+			kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
+
+			/* The name field is constant - no lock needed. */
+			if (!strcmp(pc->name, name))
+				BUG();
+		}
+	}
+
+	/* There is no reason to lock our new cache before we
+	 * link it in - no one knows about it yet...
+	 */
+	list_add(&cachep->next, &cache_chain);
+	up(&cache_chain_sem);
+opps:
+	return cachep;
+}
+
+
+#if DEBUG
+/*
+ * This check if the kmem_cache_t pointer is chained in the cache_cache
+ * list. -arca
+ */
+static int is_chained_kmem_cache(kmem_cache_t * cachep)
+{
+	struct list_head *p;
+	int ret = 0;
+        unsigned long spin_flags;
+
+	/* Find the cache in the chain of caches. */
+	down(&cache_chain_sem);
+	list_for_each(p, &cache_chain) {
+		if (p == &cachep->next) {
+			ret = 1;
+			break;
+		}
+	}
+	up(&cache_chain_sem);
+
+	return ret;
+}
+#else
+#define is_chained_kmem_cache(x) 1
+#endif
+
+#ifdef CONFIG_SMP
+/*
+ * Waits for all CPUs to execute func().
+ */
+static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
+{
+	local_irq_disable();
+	func(arg);
+	local_irq_enable();
+
+	if (smp_call_function(func, arg, 1, 1))
+		BUG();
+}
+typedef struct ccupdate_struct_s
+{
+	kmem_cache_t *cachep;
+	cpucache_t *new[NR_CPUS];
+} ccupdate_struct_t;
+
+static void do_ccupdate_local(void *info)
+{
+	ccupdate_struct_t *new = (ccupdate_struct_t *)info;
+	cpucache_t *old = cc_data(new->cachep);
+	
+	cc_data(new->cachep) = new->new[smp_processor_id()];
+	new->new[smp_processor_id()] = old;
+}
+
+static void free_block (kmem_cache_t* cachep, void** objpp, int len);
+
+static void drain_cpu_caches(kmem_cache_t *cachep)
+{
+	ccupdate_struct_t new;
+	int i;
+        unsigned long spin_flags;
+
+	memset(&new.new,0,sizeof(new.new));
+
+	new.cachep = cachep;
+
+	down(&cache_chain_sem);
+	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+
+	for (i = 0; i < smp_num_cpus; i++) {
+		cpucache_t* ccold = new.new[cpu_logical_map(i)];
+		if (!ccold || (ccold->avail == 0))
+			continue;
+		local_irq_disable();
+		free_block(cachep, cc_entry(ccold), ccold->avail);
+		local_irq_enable();
+		ccold->avail = 0;
+	}
+	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+	up(&cache_chain_sem);
+}
+
+#else
+#define drain_cpu_caches(cachep)	do { } while (0)
+#endif
+
+static int __kmem_cache_shrink(kmem_cache_t *cachep)
+{
+	slab_t *slabp;
+	int ret;
+
+	drain_cpu_caches(cachep);
+
+	spin_lock_irq(&cachep->spinlock);
+
+	/* If the cache is growing, stop shrinking. */
+	while (!cachep->growing) {
+		struct list_head *p;
+
+		p = cachep->slabs_free.prev;
+		if (p == &cachep->slabs_free)
+			break;
+
+		slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
+#if DEBUG
+		if (slabp->inuse)
+			BUG();
+#endif
+		list_del(&slabp->list);
+
+		spin_unlock_irq(&cachep->spinlock);
+		kmem_slab_destroy(cachep, slabp);
+		spin_lock_irq(&cachep->spinlock);
+	}
+	ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
+	spin_unlock_irq(&cachep->spinlock);
+	return ret;
+}
+
+/**
+ * kmem_cache_shrink - Shrink a cache.
+ * @cachep: The cache to shrink.
+ *
+ * Releases as many slabs as possible for a cache.
+ * To help debugging, a zero exit status indicates all slabs were released.
+ */
+int kmem_cache_shrink(kmem_cache_t *cachep)
+{
+	if (!cachep || !is_chained_kmem_cache(cachep))
+		BUG();
+
+	return __kmem_cache_shrink(cachep);
+}
+
+/**
+ * kmem_cache_destroy - delete a cache
+ * @cachep: the cache to destroy
+ *
+ * Remove a kmem_cache_t object from the slab cache.
+ * Returns 0 on success.
+ *
+ * It is expected this function will be called by a module when it is
+ * unloaded.  This will remove the cache completely, and avoid a duplicate
+ * cache being allocated each time a module is loaded and unloaded, if the
+ * module doesn't have persistent in-kernel storage across loads and unloads.
+ *
+ * The caller must guarantee that noone will allocate memory from the cache
+ * during the kmem_cache_destroy().
+ */
+int kmem_cache_destroy (kmem_cache_t * cachep)
+{
+        unsigned long spin_flags;
+
+	if (!cachep || cachep->growing)
+		BUG();
+
+	/* Find the cache in the chain of caches. */
+	down(&cache_chain_sem);
+	/* the chain is never empty, cache_cache is never destroyed */
+	if (clock_searchp == cachep)
+		clock_searchp = list_entry(cachep->next.next,
+						kmem_cache_t, next);
+	list_del(&cachep->next);
+	up(&cache_chain_sem);
+
+	if (__kmem_cache_shrink(cachep)) {
+		printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
+		       cachep);
+		down(&cache_chain_sem);
+		list_add(&cachep->next,&cache_chain);
+		up(&cache_chain_sem);
+		return 1;
+	}
+#ifdef CONFIG_SMP
+	{
+		int i;
+		for (i = 0; i < NR_CPUS; i++)
+			kfree(cachep->cpudata[i]);
+	}
+#endif
+	kmem_cache_free(&cache_cache, cachep);
+
+	return 0;
+}
+
+/* Get the memory for a slab management obj. */
+static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
+			void *objp, int colour_off, int local_flags)
+{
+	slab_t *slabp;
+	
+	if (OFF_SLAB(cachep)) {
+		/* Slab management obj is off-slab. */
+		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+		if (!slabp)
+			return NULL;
+	} else {
+		/* FIXME: change to
+			slabp = objp
+		 * if you enable OPTIMIZE
+		 */
+		slabp = objp+colour_off;
+		colour_off += L1_CACHE_ALIGN(cachep->num *
+				sizeof(kmem_bufctl_t) + sizeof(slab_t));
+	}
+	slabp->inuse = 0;
+	slabp->colouroff = colour_off;
+	slabp->s_mem = objp+colour_off;
+
+	return slabp;
+}
+
+static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
+			slab_t * slabp, unsigned long ctor_flags)
+{
+	int i;
+
+	for (i = 0; i < cachep->num; i++) {
+		void* objp = slabp->s_mem+cachep->objsize*i;
+#if DEBUG
+		if (cachep->flags & SLAB_RED_ZONE) {
+			*((unsigned long*)(objp)) = RED_MAGIC1;
+			*((unsigned long*)(objp + cachep->objsize -
+					BYTES_PER_WORD)) = RED_MAGIC1;
+			objp += BYTES_PER_WORD;
+		}
+#endif
+
+		/*
+		 * Constructors are not allowed to allocate memory from
+		 * the same cache which they are a constructor for.
+		 * Otherwise, deadlock. They must also be threaded.
+		 */
+		if (cachep->ctor)
+			cachep->ctor(objp, cachep, ctor_flags);
+#if DEBUG
+		if (cachep->flags & SLAB_RED_ZONE)
+			objp -= BYTES_PER_WORD;
+		if (cachep->flags & SLAB_POISON)
+			/* need to poison the objs */
+			kmem_poison_obj(cachep, objp);
+		if (cachep->flags & SLAB_RED_ZONE) {
+			if (*((unsigned long*)(objp)) != RED_MAGIC1)
+				BUG();
+			if (*((unsigned long*)(objp + cachep->objsize -
+					BYTES_PER_WORD)) != RED_MAGIC1)
+				BUG();
+		}
+#endif
+		slab_bufctl(slabp)[i] = i+1;
+	}
+	slab_bufctl(slabp)[i-1] = BUFCTL_END;
+	slabp->free = 0;
+}
+
+/*
+ * Grow (by 1) the number of slabs within a cache.  This is called by
+ * kmem_cache_alloc() when there are no active objs left in a cache.
+ */
+static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
+{
+	slab_t	*slabp;
+	struct pfn_info	*page; unsigned int i;
+	void		*objp;
+	size_t		 offset;
+	unsigned int	 local_flags;
+	unsigned long	 ctor_flags;
+	unsigned long	 save_flags;
+
+	/* Be lazy and only check for valid flags here,
+ 	 * keeping it out of the critical path in kmem_cache_alloc().
+	 */
+	if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
+		BUG();
+	if (flags & SLAB_NO_GROW)
+		return 0;
+
+#if 0
+	if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
+		BUG();
+#endif
+
+	ctor_flags = SLAB_CTOR_CONSTRUCTOR;
+	local_flags = (flags & SLAB_LEVEL_MASK);
+	if (local_flags == SLAB_ATOMIC)
+		/*
+		 * Not allowed to sleep.  Need to tell a constructor about
+		 * this - it might need to know...
+		 */
+		ctor_flags |= SLAB_CTOR_ATOMIC;
+
+	/* About to mess with non-constant members - lock. */
+	spin_lock_irqsave(&cachep->spinlock, save_flags);
+
+	/* Get colour for the slab, and cal the next value. */
+	offset = cachep->colour_next;
+	cachep->colour_next++;
+	if (cachep->colour_next >= cachep->colour)
+		cachep->colour_next = 0;
+	offset *= cachep->colour_off;
+	cachep->dflags |= DFLGS_GROWN;
+
+	cachep->growing++;
+	spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+
+	/* A series of memory allocations for a new slab.
+	 * Neither the cache-chain semaphore, or cache-lock, are
+	 * held, but the incrementing c_growing prevents this
+	 * cache from being reaped or shrunk.
+	 * Note: The cache could be selected in for reaping in
+	 * kmem_cache_reap(), but when the final test is made the
+	 * growing value will be seen.
+	 */
+
+	/* Get mem for the objs. */
+	if (!(objp = kmem_getpages(cachep, flags)))
+		goto failed;
+
+	/* Get slab management. */
+	if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
+		goto opps1;
+
+	/* Nasty!!!!!! I hope this is OK. */
+	i = 1 << cachep->gfporder;
+	page = virt_to_page(objp);
+	do {
+		SET_PAGE_CACHE(page, cachep);
+		SET_PAGE_SLAB(page, slabp);
+		PageSetSlab(page);
+		page++;
+	} while (--i);
+
+	kmem_cache_init_objs(cachep, slabp, ctor_flags);
+
+	spin_lock_irqsave(&cachep->spinlock, save_flags);
+	cachep->growing--;
+
+	/* Make slab active. */
+	list_add_tail(&slabp->list, &cachep->slabs_free);
+	STATS_INC_GROWN(cachep);
+	cachep->failures = 0;
+
+	spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+	return 1;
+opps1:
+	kmem_freepages(cachep, objp);
+failed:
+	spin_lock_irqsave(&cachep->spinlock, save_flags);
+	cachep->growing--;
+	spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+	return 0;
+}
+
+/*
+ * Perform extra freeing checks:
+ * - detect double free
+ * - detect bad pointers.
+ * Called with the cache-lock held.
+ */
+
+#if DEBUG
+static int kmem_extra_free_checks (kmem_cache_t * cachep,
+			slab_t *slabp, void * objp)
+{
+	int i;
+	unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
+
+	if (objnr >= cachep->num)
+		BUG();
+	if (objp != slabp->s_mem + objnr*cachep->objsize)
+		BUG();
+
+	/* Check slab's freelist to see if this obj is there. */
+	for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
+		if (i == objnr)
+			BUG();
+	}
+	return 0;
+}
+#endif
+
+static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
+{
+	if (flags & SLAB_DMA) {
+		if (!(cachep->gfpflags & GFP_DMA))
+			BUG();
+	} else {
+		if (cachep->gfpflags & GFP_DMA)
+			BUG();
+	}
+}
+
+static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
+						slab_t *slabp)
+{
+	void *objp;
+
+	STATS_INC_ALLOCED(cachep);
+	STATS_INC_ACTIVE(cachep);
+	STATS_SET_HIGH(cachep);
+
+	/* get obj pointer */
+	slabp->inuse++;
+	objp = slabp->s_mem + slabp->free*cachep->objsize;
+	slabp->free=slab_bufctl(slabp)[slabp->free];
+
+	if (unlikely(slabp->free == BUFCTL_END)) {
+		list_del(&slabp->list);
+		list_add(&slabp->list, &cachep->slabs_full);
+	}
+#if DEBUG
+	if (cachep->flags & SLAB_POISON)
+		if (kmem_check_poison_obj(cachep, objp))
+			BUG();
+	if (cachep->flags & SLAB_RED_ZONE) {
+		/* Set alloc red-zone, and check old one. */
+		if (xchg((unsigned long *)objp, RED_MAGIC2) !=
+							 RED_MAGIC1)
+			BUG();
+		if (xchg((unsigned long *)(objp+cachep->objsize -
+			  BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
+			BUG();
+		objp += BYTES_PER_WORD;
+	}
+#endif
+	return objp;
+}
+
+/*
+ * Returns a ptr to an obj in the given cache.
+ * caller must guarantee synchronization
+ * #define for the goto optimization 8-)
+ */
+#define kmem_cache_alloc_one(cachep)				\
+({								\
+	struct list_head * slabs_partial, * entry;		\
+	slab_t *slabp;						\
+								\
+	slabs_partial = &(cachep)->slabs_partial;		\
+	entry = slabs_partial->next;				\
+	if (unlikely(entry == slabs_partial)) {			\
+		struct list_head * slabs_free;			\
+		slabs_free = &(cachep)->slabs_free;		\
+		entry = slabs_free->next;			\
+		if (unlikely(entry == slabs_free))		\
+			goto alloc_new_slab;			\
+		list_del(entry);				\
+		list_add(entry, slabs_partial);			\
+	}							\
+								\
+	slabp = list_entry(entry, slab_t, list);		\
+	kmem_cache_alloc_one_tail(cachep, slabp);		\
+})
+
+#ifdef CONFIG_SMP
+void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
+{
+	int batchcount = cachep->batchcount;
+	cpucache_t* cc = cc_data(cachep);
+
+	spin_lock(&cachep->spinlock);
+	while (batchcount--) {
+		struct list_head * slabs_partial, * entry;
+		slab_t *slabp;
+		/* Get slab alloc is to come from. */
+		slabs_partial = &(cachep)->slabs_partial;
+		entry = slabs_partial->next;
+		if (unlikely(entry == slabs_partial)) {
+			struct list_head * slabs_free;
+			slabs_free = &(cachep)->slabs_free;
+			entry = slabs_free->next;
+			if (unlikely(entry == slabs_free))
+				break;
+			list_del(entry);
+			list_add(entry, slabs_partial);
+		}
+
+		slabp = list_entry(entry, slab_t, list);
+		cc_entry(cc)[cc->avail++] =
+				kmem_cache_alloc_one_tail(cachep, slabp);
+	}
+	spin_unlock(&cachep->spinlock);
+
+	if (cc->avail)
+		return cc_entry(cc)[--cc->avail];
+	return NULL;
+}
+#endif
+
+static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
+{
+	unsigned long save_flags;
+	void* objp;
+
+	kmem_cache_alloc_head(cachep, flags);
+try_again:
+	local_irq_save(save_flags);
+#ifdef CONFIG_SMP
+	{
+		cpucache_t *cc = cc_data(cachep);
+
+		if (cc) {
+			if (cc->avail) {
+				STATS_INC_ALLOCHIT(cachep);
+				objp = cc_entry(cc)[--cc->avail];
+			} else {
+				STATS_INC_ALLOCMISS(cachep);
+				objp = kmem_cache_alloc_batch(cachep,flags);
+				if (!objp)
+					goto alloc_new_slab_nolock;
+			}
+		} else {
+			spin_lock(&cachep->spinlock);
+			objp = kmem_cache_alloc_one(cachep);
+			spin_unlock(&cachep->spinlock);
+		}
+	}
+#else
+	objp = kmem_cache_alloc_one(cachep);
+#endif
+	local_irq_restore(save_flags);
+	return objp;
+alloc_new_slab:
+#ifdef CONFIG_SMP
+	spin_unlock(&cachep->spinlock);
+alloc_new_slab_nolock:
+#endif
+	local_irq_restore(save_flags);
+	if (kmem_cache_grow(cachep, flags))
+		/* Someone may have stolen our objs.  Doesn't matter, we'll
+		 * just come back here again.
+		 */
+		goto try_again;
+	return NULL;
+}
+
+/*
+ * Release an obj back to its cache. If the obj has a constructed
+ * state, it should be in this state _before_ it is released.
+ * - caller is responsible for the synchronization
+ */
+
+#if DEBUG
+# define CHECK_NR(pg)						\
+	do {							\
+		if (!VALID_PAGE(pg)) {				\
+			printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
+				(unsigned long)objp);		\
+			BUG();					\
+		} \
+	} while (0)
+# define CHECK_PAGE(page)					\
+	do {							\
+		CHECK_NR(page);					\
+		if (!PageSlab(page)) {				\
+			printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
+				(unsigned long)objp);		\
+			BUG();					\
+		}						\
+	} while (0)
+
+#else
+# define CHECK_PAGE(pg)	do { } while (0)
+#endif
+
+static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
+{
+	slab_t* slabp;
+
+	CHECK_PAGE(virt_to_page(objp));
+	/* reduces memory footprint
+	 *
+	if (OPTIMIZE(cachep))
+		slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
+	 else
+	 */
+	slabp = GET_PAGE_SLAB(virt_to_page(objp));
+
+#if DEBUG
+	if (cachep->flags & SLAB_DEBUG_INITIAL)
+		/* Need to call the slab's constructor so the
+		 * caller can perform a verify of its state (debugging).
+		 * Called without the cache-lock held.
+		 */
+		cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
+
+	if (cachep->flags & SLAB_RED_ZONE) {
+		objp -= BYTES_PER_WORD;
+		if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
+			/* Either write before start, or a double free. */
+			BUG();
+		if (xchg((unsigned long *)(objp+cachep->objsize -
+				BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
+			/* Either write past end, or a double free. */
+			BUG();
+	}
+	if (cachep->flags & SLAB_POISON)
+		kmem_poison_obj(cachep, objp);
+	if (kmem_extra_free_checks(cachep, slabp, objp))
+		return;
+#endif
+	{
+		unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
+
+		slab_bufctl(slabp)[objnr] = slabp->free;
+		slabp->free = objnr;
+	}
+	STATS_DEC_ACTIVE(cachep);
+	
+	/* fixup slab chains */
+	{
+		int inuse = slabp->inuse;
+		if (unlikely(!--slabp->inuse)) {
+			/* Was partial or full, now empty. */
+			list_del(&slabp->list);
+			list_add(&slabp->list, &cachep->slabs_free);
+		} else if (unlikely(inuse == cachep->num)) {
+			/* Was full. */
+			list_del(&slabp->list);
+			list_add(&slabp->list, &cachep->slabs_partial);
+		}
+	}
+}
+
+#ifdef CONFIG_SMP
+static inline void __free_block (kmem_cache_t* cachep,
+							void** objpp, int len)
+{
+	for ( ; len > 0; len--, objpp++)
+		kmem_cache_free_one(cachep, *objpp);
+}
+
+static void free_block (kmem_cache_t* cachep, void** objpp, int len)
+{
+	spin_lock(&cachep->spinlock);
+	__free_block(cachep, objpp, len);
+	spin_unlock(&cachep->spinlock);
+}
+#endif
+
+/*
+ * __kmem_cache_free
+ * called with disabled ints
+ */
+static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
+{
+#ifdef CONFIG_SMP
+	cpucache_t *cc = cc_data(cachep);
+
+	CHECK_PAGE(virt_to_page(objp));
+	if (cc) {
+		int batchcount;
+		if (cc->avail < cc->limit) {
+			STATS_INC_FREEHIT(cachep);
+			cc_entry(cc)[cc->avail++] = objp;
+			return;
+		}
+		STATS_INC_FREEMISS(cachep);
+		batchcount = cachep->batchcount;
+		cc->avail -= batchcount;
+		free_block(cachep,
+					&cc_entry(cc)[cc->avail],batchcount);
+		cc_entry(cc)[cc->avail++] = objp;
+		return;
+	} else {
+		free_block(cachep, &objp, 1);
+	}
+#else
+	kmem_cache_free_one(cachep, objp);
+#endif
+}
+
+/**
+ * kmem_cache_alloc - Allocate an object
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ *
+ * Allocate an object from this cache.  The flags are only relevant
+ * if the cache has no available objects.
+ */
+void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
+{
+	return __kmem_cache_alloc(cachep, flags);
+}
+
+/**
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * kmalloc is the normal method of allocating memory
+ * in the kernel.
+ *
+ * The @flags argument may be one of:
+ *
+ * %GFP_USER - Allocate memory on behalf of user.  May sleep.
+ *
+ * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
+ *
+ * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
+ *
+ * Additionally, the %GFP_DMA flag may be set to indicate the memory
+ * must be suitable for DMA.  This can mean different things on different
+ * platforms.  For example, on i386, it means that the memory must come
+ * from the first 16MB.
+ */
+void * kmalloc (size_t size, int flags)
+{
+	cache_sizes_t *csizep = cache_sizes;
+
+	for (; csizep->cs_size; csizep++) {
+		if (size > csizep->cs_size)
+			continue;
+		return __kmem_cache_alloc(flags & GFP_DMA ?
+			 csizep->cs_dmacachep : csizep->cs_cachep, flags);
+	}
+	return NULL;
+}
+
+/**
+ * kmem_cache_free - Deallocate an object
+ * @cachep: The cache the allocation was from.
+ * @objp: The previously allocated object.
+ *
+ * Free an object which was previously allocated from this
+ * cache.
+ */
+void kmem_cache_free (kmem_cache_t *cachep, void *objp)
+{
+	unsigned long flags;
+#if DEBUG
+	CHECK_PAGE(virt_to_page(objp));
+	if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
+		BUG();
+#endif
+
+	local_irq_save(flags);
+	__kmem_cache_free(cachep, objp);
+	local_irq_restore(flags);
+}
+
+/**
+ * kfree - free previously allocated memory
+ * @objp: pointer returned by kmalloc.
+ *
+ * Don't free memory not originally allocated by kmalloc()
+ * or you will run into trouble.
+ */
+void kfree (const void *objp)
+{
+	kmem_cache_t *c;
+	unsigned long flags;
+
+	if (!objp)
+		return;
+	local_irq_save(flags);
+	CHECK_PAGE(virt_to_page(objp));
+	c = GET_PAGE_CACHE(virt_to_page(objp));
+	__kmem_cache_free(c, (void*)objp);
+	local_irq_restore(flags);
+}
+
+kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
+{
+	cache_sizes_t *csizep = cache_sizes;
+
+	/* This function could be moved to the header file, and
+	 * made inline so consumers can quickly determine what
+	 * cache pointer they require.
+	 */
+	for ( ; csizep->cs_size; csizep++) {
+		if (size > csizep->cs_size)
+			continue;
+		break;
+	}
+	return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
+}
+
+#ifdef CONFIG_SMP
+
+/* called with cache_chain_sem acquired.  */
+static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
+{
+	ccupdate_struct_t new;
+	int i;
+
+	/*
+	 * These are admin-provided, so we are more graceful.
+	 */
+	if (limit < 0)
+		return -EINVAL;
+	if (batchcount < 0)
+		return -EINVAL;
+	if (batchcount > limit)
+		return -EINVAL;
+	if (limit != 0 && !batchcount)
+		return -EINVAL;
+
+	memset(&new.new,0,sizeof(new.new));
+	if (limit) {
+		for (i = 0; i< smp_num_cpus; i++) {
+			cpucache_t* ccnew;
+
+			ccnew = kmalloc(sizeof(void*)*limit+
+					sizeof(cpucache_t), GFP_KERNEL);
+			if (!ccnew)
+				goto oom;
+			ccnew->limit = limit;
+			ccnew->avail = 0;
+			new.new[cpu_logical_map(i)] = ccnew;
+		}
+	}
+	new.cachep = cachep;
+	spin_lock_irq(&cachep->spinlock);
+	cachep->batchcount = batchcount;
+	spin_unlock_irq(&cachep->spinlock);
+
+	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+
+	for (i = 0; i < smp_num_cpus; i++) {
+		cpucache_t* ccold = new.new[cpu_logical_map(i)];
+		if (!ccold)
+			continue;
+		local_irq_disable();
+		free_block(cachep, cc_entry(ccold), ccold->avail);
+		local_irq_enable();
+		kfree(ccold);
+	}
+	return 0;
+oom:
+	for (i--; i >= 0; i--)
+		kfree(new.new[cpu_logical_map(i)]);
+	return -ENOMEM;
+}
+
+static void enable_cpucache (kmem_cache_t *cachep)
+{
+	int err;
+	int limit;
+
+	/* FIXME: optimize */
+	if (cachep->objsize > PAGE_SIZE)
+		return;
+	if (cachep->objsize > 1024)
+		limit = 60;
+	else if (cachep->objsize > 256)
+		limit = 124;
+	else
+		limit = 252;
+
+	err = kmem_tune_cpucache(cachep, limit, limit/2);
+	if (err)
+		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
+					cachep->name, -err);
+}
+
+static void enable_all_cpucaches (void)
+{
+	struct list_head* p;
+        unsigned long spin_flags;
+
+	down(&cache_chain_sem);
+
+	p = &cache_cache.next;
+	do {
+		kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
+
+		enable_cpucache(cachep);
+		p = cachep->next.next;
+	} while (p != &cache_cache.next);
+
+	up(&cache_chain_sem);
+}
+#endif
+
+/**
+ * kmem_cache_reap - Reclaim memory from caches.
+ * @gfp_mask: the type of memory required.
+ *
+ * Called from do_try_to_free_pages() and __alloc_pages()
+ */
+int kmem_cache_reap (int gfp_mask)
+{
+	slab_t *slabp;
+	kmem_cache_t *searchp;
+	kmem_cache_t *best_cachep;
+	unsigned int best_pages;
+	unsigned int best_len;
+	unsigned int scan;
+	int ret = 0;
+        unsigned long spin_flags;
+
+        down(&cache_chain_sem);
+
+	scan = REAP_SCANLEN;
+	best_len = 0;
+	best_pages = 0;
+	best_cachep = NULL;
+	searchp = clock_searchp;
+	do {
+		unsigned int pages;
+		struct list_head* p;
+		unsigned int full_free;
+
+		/* It's safe to test this without holding the cache-lock. */
+		if (searchp->flags & SLAB_NO_REAP)
+			goto next;
+		spin_lock_irq(&searchp->spinlock);
+		if (searchp->growing)
+			goto next_unlock;
+		if (searchp->dflags & DFLGS_GROWN) {
+			searchp->dflags &= ~DFLGS_GROWN;
+			goto next_unlock;
+		}
+#ifdef CONFIG_SMP
+		{
+			cpucache_t *cc = cc_data(searchp);
+			if (cc && cc->avail) {
+				__free_block(searchp, cc_entry(cc), cc->avail);
+				cc->avail = 0;
+			}
+		}
+#endif
+
+		full_free = 0;
+		p = searchp->slabs_free.next;
+		while (p != &searchp->slabs_free) {
+			slabp = list_entry(p, slab_t, list);
+#if DEBUG
+			if (slabp->inuse)
+				BUG();
+#endif
+			full_free++;
+			p = p->next;
+		}
+
+		/*
+		 * Try to avoid slabs with constructors and/or
+		 * more than one page per slab (as it can be difficult
+		 * to get high orders from gfp()).
+		 */
+		pages = full_free * (1<<searchp->gfporder);
+		if (searchp->ctor)
+			pages = (pages*4+1)/5;
+		if (searchp->gfporder)
+			pages = (pages*4+1)/5;
+		if (pages > best_pages) {
+			best_cachep = searchp;
+			best_len = full_free;
+			best_pages = pages;
+			if (pages >= REAP_PERFECT) {
+				clock_searchp = list_entry(searchp->next.next,
+							kmem_cache_t,next);
+				goto perfect;
+			}
+		}
+next_unlock:
+		spin_unlock_irq(&searchp->spinlock);
+next:
+		searchp = list_entry(searchp->next.next,kmem_cache_t,next);
+	} while (--scan && searchp != clock_searchp);
+
+	clock_searchp = searchp;
+
+	if (!best_cachep)
+		/* couldn't find anything to reap */
+		goto out;
+
+	spin_lock_irq(&best_cachep->spinlock);
+perfect:
+	/* free only 50% of the free slabs */
+	best_len = (best_len + 1)/2;
+	for (scan = 0; scan < best_len; scan++) {
+		struct list_head *p;
+
+		if (best_cachep->growing)
+			break;
+		p = best_cachep->slabs_free.prev;
+		if (p == &best_cachep->slabs_free)
+			break;
+		slabp = list_entry(p,slab_t,list);
+#if DEBUG
+		if (slabp->inuse)
+			BUG();
+#endif
+		list_del(&slabp->list);
+		STATS_INC_REAPED(best_cachep);
+
+		/* Safe to drop the lock. The slab is no longer linked to the
+		 * cache.
+		 */
+		spin_unlock_irq(&best_cachep->spinlock);
+		kmem_slab_destroy(best_cachep, slabp);
+		spin_lock_irq(&best_cachep->spinlock);
+	}
+	spin_unlock_irq(&best_cachep->spinlock);
+	ret = scan * (1 << best_cachep->gfporder);
+out:
+	up(&cache_chain_sem);
+	return ret;
+}
+
+void dump_slabinfo()
+{
+	struct list_head *p;
+        unsigned long spin_flags;
+
+	/* Output format version, so at least we can change it without _too_
+	 * many complaints.
+	 */
+	printk( "slabinfo - version: 1.1"
+#if STATS
+				" (statistics)"
+#endif
+#ifdef CONFIG_SMP
+				" (SMP)"
+#endif
+				"\n");
+	down(&cache_chain_sem);
+	p = &cache_cache.next;
+	do {
+		kmem_cache_t	*cachep;
+		struct list_head *q;
+		slab_t		*slabp;
+		unsigned long	active_objs;
+		unsigned long	num_objs;
+		unsigned long	active_slabs = 0;
+		unsigned long	num_slabs;
+		cachep = list_entry(p, kmem_cache_t, next);
+
+		spin_lock_irq(&cachep->spinlock);
+		active_objs = 0;
+		num_slabs = 0;
+		list_for_each(q,&cachep->slabs_full) {
+			slabp = list_entry(q, slab_t, list);
+			if (slabp->inuse != cachep->num)
+				BUG();
+			active_objs += cachep->num;
+			active_slabs++;
+		}
+		list_for_each(q,&cachep->slabs_partial) {
+			slabp = list_entry(q, slab_t, list);
+			if (slabp->inuse == cachep->num || !slabp->inuse)
+				BUG();
+			active_objs += slabp->inuse;
+			active_slabs++;
+		}
+		list_for_each(q,&cachep->slabs_free) {
+			slabp = list_entry(q, slab_t, list);
+			if (slabp->inuse)
+				BUG();
+			num_slabs++;
+		}
+		num_slabs+=active_slabs;
+		num_objs = num_slabs*cachep->num;
+
+		printk("%-17s %6lu %6lu %6u %4lu %4lu %4u",
+			cachep->name, active_objs, num_objs, cachep->objsize,
+			active_slabs, num_slabs, (1<<cachep->gfporder));
+
+#if STATS
+		{
+			unsigned long errors = cachep->errors;
+			unsigned long high = cachep->high_mark;
+			unsigned long grown = cachep->grown;
+			unsigned long reaped = cachep->reaped;
+			unsigned long allocs = cachep->num_allocations;
+
+			printk(" : %6lu %7lu %5lu %4lu %4lu",
+					high, allocs, grown, reaped, errors);
+		}
+#endif
+#ifdef CONFIG_SMP
+		{
+			unsigned int batchcount = cachep->batchcount;
+			unsigned int limit;
+
+			if (cc_data(cachep))
+				limit = cc_data(cachep)->limit;
+			 else
+				limit = 0;
+			printk(" : %4u %4u",
+					limit, batchcount);
+		}
+#endif
+#if STATS && defined(CONFIG_SMP)
+		{
+			unsigned long allochit = atomic_read(&cachep->allochit);
+			unsigned long allocmiss = atomic_read(&cachep->allocmiss);
+			unsigned long freehit = atomic_read(&cachep->freehit);
+			unsigned long freemiss = atomic_read(&cachep->freemiss);
+			printk(" : %6lu %6lu %6lu %6lu",
+					allochit, allocmiss, freehit, freemiss);
+		}
+#endif
+		printk("\n");
+		spin_unlock_irq(&cachep->spinlock);
+
+		p = cachep->next.next;
+	} while (p != &cache_cache.next);
+
+	up(&cache_chain_sem);
+
+	return;
+}
+
+
+
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
new file mode 100644
index 0000000000..b98c47f3ce
--- /dev/null
+++ b/xen/common/softirq.c
@@ -0,0 +1,332 @@
+/*
+ *	linux/kernel/softirq.c
+ *
+ *	Copyright (C) 1992 Linus Torvalds
+ *
+ * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
+ * due bh_mask_count not atomic handling. Copyright (C) 1998  Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+//#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+//#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/tqueue.h>
+
+/*
+   - No shared variables, all the data are CPU local.
+   - If a softirq needs serialization, let it serialize itself
+     by its own spinlocks.
+   - Even if softirq is serialized, only local cpu is marked for
+     execution. Hence, we get something sort of weak cpu binding.
+     Though it is still not clear, will it result in better locality
+     or will not.
+   - These softirqs are not masked by global cli() and start_bh_atomic()
+     (by clear reasons). Hence, old parts of code still using global locks
+     MUST NOT use softirqs, but insert interfacing routines acquiring
+     global locks. F.e. look at BHs implementation.
+
+   Examples:
+   - NET RX softirq. It is multithreaded and does not require
+     any global serialization.
+   - NET TX softirq. It kicks software netdevice queues, hence
+     it is logically serialized per device, but this serialization
+     is invisible to common code.
+   - Tasklets: serialized wrt itself.
+   - Bottom halves: globally serialized, grr...
+ */
+
+irq_cpustat_t irq_stat[NR_CPUS];
+
+static struct softirq_action softirq_vec[32] __cacheline_aligned;
+
+
+asmlinkage void do_softirq()
+{
+	int cpu = smp_processor_id();
+	__u32 pending;
+	long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	pending = softirq_pending(cpu);
+
+	while (pending) {
+		struct softirq_action *h;
+
+		local_bh_disable();
+restart:
+		/* Reset the pending bitmask before enabling irqs */
+		softirq_pending(cpu) = 0;
+
+		local_irq_enable();
+
+		h = softirq_vec;
+
+		do {
+			if (pending & 1)
+				h->action(h);
+			h++;
+			pending >>= 1;
+		} while (pending);
+
+		local_irq_disable();
+
+		pending = softirq_pending(cpu);
+		if (pending) goto restart;
+		__local_bh_enable();
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * This function must run with irq disabled!
+ */
+inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
+{
+	__cpu_raise_softirq(cpu, nr);
+
+#ifdef CONFIG_SMP
+        if ( cpu != smp_processor_id() )
+            smp_send_event_check_cpu(cpu);
+#endif
+}
+
+void raise_softirq(unsigned int nr)
+{
+	long flags;
+
+	local_irq_save(flags);
+	cpu_raise_softirq(smp_processor_id(), nr);
+	local_irq_restore(flags);
+}
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+	softirq_vec[nr].data = data;
+	softirq_vec[nr].action = action;
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+void __tasklet_schedule(struct tasklet_struct *t)
+{
+	int cpu = smp_processor_id();
+	unsigned long flags;
+
+	local_irq_save(flags);
+	t->next = tasklet_vec[cpu].list;
+	tasklet_vec[cpu].list = t;
+	cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+void __tasklet_hi_schedule(struct tasklet_struct *t)
+{
+	int cpu = smp_processor_id();
+	unsigned long flags;
+
+	local_irq_save(flags);
+	t->next = tasklet_hi_vec[cpu].list;
+	tasklet_hi_vec[cpu].list = t;
+	cpu_raise_softirq(cpu, HI_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+static void tasklet_action(struct softirq_action *a)
+{
+	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_vec[cpu].list;
+	tasklet_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (!atomic_read(&t->count)) {
+				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+					BUG();
+				t->func(t->data);
+			}
+			tasklet_unlock(t);
+			continue;
+		}
+
+		local_irq_disable();
+		t->next = tasklet_vec[cpu].list;
+		tasklet_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_hi_vec[cpu].list;
+	tasklet_hi_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (!atomic_read(&t->count)) {
+				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+					BUG();
+				t->func(t->data);
+			}
+			tasklet_unlock(t);
+			continue;
+		}
+
+		local_irq_disable();
+		t->next = tasklet_hi_vec[cpu].list;
+		tasklet_hi_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, HI_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+		  void (*func)(unsigned long), unsigned long data)
+{
+	t->next = NULL;
+	t->state = 0;
+	atomic_set(&t->count, 0);
+	t->func = func;
+	t->data = data;
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+	if (in_interrupt())
+		printk("Attempt to kill tasklet from interrupt\n");
+
+	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		set_current_state(TASK_RUNNING);
+		do {
+			current->policy |= SCHED_YIELD;
+			schedule();
+		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
+	}
+	tasklet_unlock_wait(t);
+	clear_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+   It is still possible to make synchronize_bh() as
+   spin_unlock_wait(&global_bh_lock). This operation is not used
+   by kernel now, so that this lock is not made private only
+   due to wait_on_irq().
+
+   It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+	int cpu = smp_processor_id();
+
+	if (!spin_trylock(&global_bh_lock))
+		goto resched;
+
+	if (!hardirq_trylock(cpu))
+		goto resched_unlock;
+
+	if (bh_base[nr])
+		bh_base[nr]();
+
+	hardirq_endlock(cpu);
+	spin_unlock(&global_bh_lock);
+	return;
+
+resched_unlock:
+	spin_unlock(&global_bh_lock);
+resched:
+	mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+	bh_base[nr] = routine;
+	mb();
+}
+
+void remove_bh(int nr)
+{
+	tasklet_kill(bh_task_vec+nr);
+	bh_base[nr] = NULL;
+}
+
+void __init softirq_init()
+{
+	int i;
+
+	for (i=0; i<32; i++)
+		tasklet_init(bh_task_vec+i, bh_action, i);
+
+	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+void __run_task_queue(task_queue *list)
+{
+	struct list_head head, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tqueue_lock, flags);
+	list_add(&head, list);
+	list_del_init(list);
+	spin_unlock_irqrestore(&tqueue_lock, flags);
+
+	next = head.next;
+	while (next != &head) {
+		void (*f) (void *);
+		struct tq_struct *p;
+		void *data;
+
+		p = list_entry(next, struct tq_struct, list);
+		next = next->next;
+		f = p->routine;
+		data = p->data;
+		wmb();
+		p->sync = 0;
+		if (f)
+			f(data);
+	}
+}
+
diff --git a/xen/common/timer.c b/xen/common/timer.c
new file mode 100644
index 0000000000..20d45ccbe6
--- /dev/null
+++ b/xen/common/timer.c
@@ -0,0 +1,603 @@
+/*
+ *  linux/kernel/timer.c
+ *
+ *  Kernel internal timers, kernel timekeeping, basic process system calls
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *              serialize accesses to xtime/lost_ticks).
+ *                              Copyright (C) 1998  Andrea Arcangeli
+ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/timex.h>
+#include <linux/tqueue.h>
+#include <linux/delay.h>
+//#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+//#include <linux/kernel_stat.h>
+
+#include <xeno/event.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Timekeeping variables
+ */
+
+long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
+
+/* The current time */
+struct timeval xtime __attribute__ ((aligned (16)));
+
+/* Don't completely fail for HZ > 500.  */
+int tickadj = 500/HZ ? : 1;		/* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_OK;		/* clock synchronization status	*/
+int time_status = STA_UNSYNC;		/* clock status bits		*/
+long time_offset;			/* time adjustment (us)		*/
+long time_constant = 2;			/* pll time constant		*/
+long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
+long time_precision = 1;		/* clock precision (us)		*/
+long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
+long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
+long time_phase;			/* phase offset (scaled us)	*/
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
+					/* frequency offset (scaled ppm)*/
+long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
+long time_reftime;			/* time at last adjustment (s)	*/
+
+long time_adjust;
+long time_adjust_step;
+
+unsigned long event;
+
+unsigned long volatile jiffies;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+	int index;
+	struct list_head vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+	int index;
+	struct list_head vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5;
+static struct timer_vec tv4;
+static struct timer_vec tv3;
+static struct timer_vec tv2;
+static struct timer_vec_root tv1;
+
+static struct timer_vec * const tvecs[] = {
+	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+void init_timervecs (void)
+{
+	int i;
+
+	for (i = 0; i < TVN_SIZE; i++) {
+		INIT_LIST_HEAD(tv5.vec + i);
+		INIT_LIST_HEAD(tv4.vec + i);
+		INIT_LIST_HEAD(tv3.vec + i);
+		INIT_LIST_HEAD(tv2.vec + i);
+	}
+	for (i = 0; i < TVR_SIZE; i++)
+		INIT_LIST_HEAD(tv1.vec + i);
+}
+
+static unsigned long timer_jiffies;
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+	/*
+	 * must be cli-ed when calling this
+	 */
+	unsigned long expires = timer->expires;
+	unsigned long idx = expires - timer_jiffies;
+	struct list_head * vec;
+
+	if (idx < TVR_SIZE) {
+		int i = expires & TVR_MASK;
+		vec = tv1.vec + i;
+	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+		int i = (expires >> TVR_BITS) & TVN_MASK;
+		vec = tv2.vec + i;
+	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+		vec =  tv3.vec + i;
+	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+		vec = tv4.vec + i;
+	} else if ((signed long) idx < 0) {
+		/* can happen if you add a timer with expires == jiffies,
+		 * or you set a timer to go off in the past
+		 */
+		vec = tv1.vec + tv1.index;
+	} else if (idx <= 0xffffffffUL) {
+		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+		vec = tv5.vec + i;
+	} else {
+		/* Can only get here on architectures with 64-bit jiffies */
+		INIT_LIST_HEAD(&timer->list);
+		return;
+	}
+	/*
+	 * Timers are FIFO!
+	 */
+	list_add(&timer->list, vec->prev);
+}
+
+/* Initialize both explicitly - let's try to have them in the same cache line */
+spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_SMP
+volatile struct timer_list * volatile running_timer;
+#define timer_enter(t) do { running_timer = t; mb(); } while (0)
+#define timer_exit() do { running_timer = NULL; } while (0)
+#define timer_is_running(t) (running_timer == t)
+#define timer_synchronize(t) while (timer_is_running(t)) barrier()
+#else
+#define timer_enter(t)		do { } while (0)
+#define timer_exit()		do { } while (0)
+#endif
+
+void add_timer(struct timer_list *timer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	if (timer_pending(timer))
+		goto bug;
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return;
+bug:
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	printk("bug: kernel timer added twice at %p.\n",
+			__builtin_return_address(0));
+}
+
+static inline int detach_timer (struct timer_list *timer)
+{
+	if (!timer_pending(timer))
+		return 0;
+	list_del(&timer->list);
+	return 1;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	timer->expires = expires;
+	ret = detach_timer(timer);
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+int del_timer(struct timer_list * timer)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	ret = detach_timer(timer);
+	timer->list.next = timer->list.prev = NULL;
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+#ifdef CONFIG_SMP
+void sync_timers(void)
+{
+	spin_unlock_wait(&global_bh_lock);
+}
+
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+	int ret = 0;
+
+	for (;;) {
+		unsigned long flags;
+		int running;
+
+		spin_lock_irqsave(&timerlist_lock, flags);
+		ret += detach_timer(timer);
+		timer->list.next = timer->list.prev = 0;
+		running = timer_is_running(timer);
+		spin_unlock_irqrestore(&timerlist_lock, flags);
+
+		if (!running)
+			break;
+
+		timer_synchronize(timer);
+	}
+
+	return ret;
+}
+#endif
+
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+	/* cascade all the timers from tv up one level */
+	struct list_head *head, *curr, *next;
+
+	head = tv->vec + tv->index;
+	curr = head->next;
+	/*
+	 * We are removing _all_ timers from the list, so we don't  have to
+	 * detach them individually, just clear the list afterwards.
+	 */
+	while (curr != head) {
+		struct timer_list *tmp;
+
+		tmp = list_entry(curr, struct timer_list, list);
+		next = curr->next;
+		list_del(curr); // not needed
+		internal_add_timer(tmp);
+		curr = next;
+	}
+	INIT_LIST_HEAD(head);
+	tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+	spin_lock_irq(&timerlist_lock);
+	while ((long)(jiffies - timer_jiffies) >= 0) {
+		struct list_head *head, *curr;
+		if (!tv1.index) {
+			int n = 1;
+			do {
+				cascade_timers(tvecs[n]);
+			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+		}
+repeat:
+		head = tv1.vec + tv1.index;
+		curr = head->next;
+		if (curr != head) {
+			struct timer_list *timer;
+			void (*fn)(unsigned long);
+			unsigned long data;
+
+			timer = list_entry(curr, struct timer_list, list);
+ 			fn = timer->function;
+ 			data= timer->data;
+
+			detach_timer(timer);
+			timer->list.next = timer->list.prev = NULL;
+			timer_enter(timer);
+			spin_unlock_irq(&timerlist_lock);
+			fn(data);
+			spin_lock_irq(&timerlist_lock);
+			timer_exit();
+			goto repeat;
+		}
+		++timer_jiffies; 
+		tv1.index = (tv1.index + 1) & TVR_MASK;
+	}
+	spin_unlock_irq(&timerlist_lock);
+}
+
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
+
+void tqueue_bh(void)
+{
+	run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+	run_task_queue(&tq_immediate);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ */
+static void second_overflow(void)
+{
+    long ltemp;
+
+    /* Bump the maxerror field */
+    time_maxerror += time_tolerance >> SHIFT_USEC;
+    if ( time_maxerror > NTP_PHASE_LIMIT ) {
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_status |= STA_UNSYNC;
+    }
+
+    /*
+     * Leap second processing. If in leap-insert state at
+     * the end of the day, the system clock is set back one
+     * second; if in leap-delete state, the system clock is
+     * set ahead one second. The microtime() routine or
+     * external clock driver will insure that reported time
+     * is always monotonic. The ugly divides should be
+     * replaced.
+     */
+    switch (time_state) {
+
+    case TIME_OK:
+	if (time_status & STA_INS)
+	    time_state = TIME_INS;
+	else if (time_status & STA_DEL)
+	    time_state = TIME_DEL;
+	break;
+
+    case TIME_INS:
+	if (xtime.tv_sec % 86400 == 0) {
+	    xtime.tv_sec--;
+	    time_state = TIME_OOP;
+	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+	}
+	break;
+
+    case TIME_DEL:
+	if ((xtime.tv_sec + 1) % 86400 == 0) {
+	    xtime.tv_sec++;
+	    time_state = TIME_WAIT;
+	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+	}
+	break;
+
+    case TIME_OOP:
+	time_state = TIME_WAIT;
+	break;
+
+    case TIME_WAIT:
+	if (!(time_status & (STA_INS | STA_DEL)))
+	    time_state = TIME_OK;
+    }
+
+    /*
+     * Compute the phase adjustment for the next second. In
+     * PLL mode, the offset is reduced by a fixed factor
+     * times the time constant. In FLL mode the offset is
+     * used directly. In either mode, the maximum phase
+     * adjustment for each second is clamped so as to spread
+     * the adjustment over not more than the number of
+     * seconds between updates.
+     */
+    if (time_offset < 0) {
+	ltemp = -time_offset;
+	if (!(time_status & STA_FLL))
+	    ltemp >>= SHIFT_KG + time_constant;
+	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+	time_offset += ltemp;
+	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    } else {
+	ltemp = time_offset;
+	if (!(time_status & STA_FLL))
+	    ltemp >>= SHIFT_KG + time_constant;
+	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+	time_offset -= ltemp;
+	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    }
+
+    if (ltemp < 0)
+	time_adj -= -ltemp >>
+	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+    else
+	time_adj += ltemp >>
+	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+     */
+    if (time_adj < 0)
+	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
+    else
+	time_adj += (time_adj >> 2) + (time_adj >> 5);
+#endif
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+	if ( (time_adjust_step = time_adjust) != 0 ) {
+	    /* We are doing an adjtime thing. 
+	     *
+	     * Prepare time_adjust_step to be within bounds.
+	     * Note that a positive time_adjust means we want the clock
+	     * to run faster.
+	     *
+	     * Limit the amount of the step to be in the range
+	     * -tickadj .. +tickadj
+	     */
+	     if (time_adjust > tickadj)
+		time_adjust_step = tickadj;
+	     else if (time_adjust < -tickadj)
+		time_adjust_step = -tickadj;
+	     
+	    /* Reduce by this step the amount of time left  */
+	    time_adjust -= time_adjust_step;
+	}
+	xtime.tv_usec += tick + time_adjust_step;
+	/*
+	 * Advance the phase, once it gets to one microsecond, then
+	 * advance the tick more.
+	 */
+	time_phase += time_adj;
+	if (time_phase <= -FINEUSEC) {
+		long ltemp = -time_phase >> SHIFT_SCALE;
+		time_phase += ltemp << SHIFT_SCALE;
+		xtime.tv_usec -= ltemp;
+	}
+	else if (time_phase >= FINEUSEC) {
+		long ltemp = time_phase >> SHIFT_SCALE;
+		time_phase -= ltemp << SHIFT_SCALE;
+		xtime.tv_usec += ltemp;
+	}
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+	do {
+		ticks--;
+		update_wall_time_one_tick();
+	} while (ticks);
+
+	if (xtime.tv_usec >= 1000000) {
+	    xtime.tv_usec -= 1000000;
+	    xtime.tv_sec++;
+	    second_overflow();
+	}
+}
+
+static inline void do_process_times(struct task_struct *p,
+	unsigned long user, unsigned long system)
+{
+    //unsigned long psecs;
+
+//	psecs = (p->times.tms_utime += user);
+	//psecs += (p->times.tms_stime += system);
+}
+
+
+void update_one_process(struct task_struct *p, unsigned long user,
+			unsigned long system, int cpu)
+{
+//	p->per_cpu_utime[cpu] += user;
+//	p->per_cpu_stime[cpu] += system;
+	do_process_times(p, user, system);
+}	
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current 
+ * process.  user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+    struct task_struct *p = current;
+    int cpu = smp_processor_id(), system = user_tick ^ 1;
+    
+    update_one_process(p, user_tick, system, cpu);
+    
+    if ( --p->counter <= 0 )
+    {
+        p->counter = 0;
+        set_bit(_HYP_EVENT_NEED_RESCHED, &p->hyp_events);
+    }
+}
+
+
+/* jiffies at the most recent update of wall time */
+unsigned long wall_jiffies;
+
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
+static inline void update_times(void)
+{
+	unsigned long ticks;
+
+	/*
+	 * update_times() is run from the raw timer_bh handler so we
+	 * just know that the irqs are locally enabled and so we don't
+	 * need to save/restore the flags of the local CPU here. -arca
+	 */
+	write_lock_irq(&xtime_lock);
+
+	ticks = jiffies - wall_jiffies;
+	if (ticks) {
+		wall_jiffies += ticks;
+		update_wall_time(ticks);
+	}
+	write_unlock_irq(&xtime_lock);
+}
+
+void timer_bh(void)
+{
+	update_times();
+	run_timer_list();
+}
+
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/smp.h>
+#include <xeno/irq.h>
+#include <asm/msr.h>
+
+void do_timer(struct pt_regs *regs)
+{
+
+    (*(unsigned long *)&jiffies)++;
+
+    if ( !using_apic_timer )
+        update_process_times(user_mode(regs));
+
+    mark_bh(TIMER_BH);
+    if (TQ_ACTIVE(tq_timer))
+        mark_bh(TQUEUE_BH);
+}
+
+void get_fast_time(struct timeval * tm)
+{
+        *tm=xtime;
+}
diff --git a/xen/common/vsprintf.c b/xen/common/vsprintf.c
new file mode 100644
index 0000000000..fe17225088
--- /dev/null
+++ b/xen/common/vsprintf.c
@@ -0,0 +1,713 @@
+/*
+ *  linux/lib/vsprintf.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
+/*
+ * Wirzenius wrote this portably, Torvalds fucked it up :-)
+ */
+
+/* 
+ * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
+ * - changed to provide snprintf and vsnprintf functions
+ */
+
+#include <stdarg.h>
+#include <xeno/ctype.h>
+#include <xeno/lib.h>
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+    unsigned long result = 0,value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) &&
+           (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp,char **endp,unsigned int base)
+{
+    if(*cp=='-')
+        return -simple_strtoul(cp+1,endp,base);
+    return simple_strtoul(cp,endp,base);
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
+{
+    unsigned long long result = 0,value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
+                                                               ? toupper(*cp) : *cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+/**
+ * simple_strtoll - convert a string to a signed long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long long simple_strtoll(const char *cp,char **endp,unsigned int base)
+{
+    if(*cp=='-')
+        return -simple_strtoull(cp+1,endp,base);
+    return simple_strtoull(cp,endp,base);
+}
+
+static int skip_atoi(const char **s)
+{
+    int i=0;
+
+    while (isdigit(**s))
+        i = i*10 + *((*s)++) - '0';
+    return i;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SPECIAL	32		/* 0x */
+#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long num, int base, int size, int precision, int type)
+{
+    char c,sign,tmp[66];
+    const char *digits;
+    const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+    const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    int i;
+
+    digits = (type & LARGE) ? large_digits : small_digits;
+    if (type & LEFT)
+        type &= ~ZEROPAD;
+    if (base < 2 || base > 36)
+        return buf;
+    c = (type & ZEROPAD) ? '0' : ' ';
+    sign = 0;
+    if (type & SIGN) {
+        if (num < 0) {
+            sign = '-';
+            num = -num;
+            size--;
+        } else if (type & PLUS) {
+            sign = '+';
+            size--;
+        } else if (type & SPACE) {
+            sign = ' ';
+            size--;
+        }
+    }
+    if (type & SPECIAL) {
+        if (base == 16)
+            size -= 2;
+        else if (base == 8)
+            size--;
+    }
+    i = 0;
+    if (num == 0)
+        tmp[i++]='0';
+    else 
+    {
+        /* XXX KAF: force unsigned mod and div. */
+        unsigned long num2=(unsigned long)num;
+        unsigned int base2=(unsigned int)base;
+        while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
+    }
+    if (i > precision)
+        precision = i;
+    size -= precision;
+    if (!(type&(ZEROPAD+LEFT))) {
+        while(size-->0) {
+            if (buf <= end)
+                *buf = ' ';
+            ++buf;
+        }
+    }
+    if (sign) {
+        if (buf <= end)
+            *buf = sign;
+        ++buf;
+    }
+    if (type & SPECIAL) {
+        if (base==8) {
+            if (buf <= end)
+                *buf = '0';
+            ++buf;
+        } else if (base==16) {
+            if (buf <= end)
+                *buf = '0';
+            ++buf;
+            if (buf <= end)
+                *buf = digits[33];
+            ++buf;
+        }
+    }
+    if (!(type & LEFT)) {
+        while (size-- > 0) {
+            if (buf <= end)
+                *buf = c;
+            ++buf;
+        }
+    }
+    while (i < precision--) {
+        if (buf <= end)
+            *buf = '0';
+        ++buf;
+    }
+    while (i-- > 0) {
+        if (buf <= end)
+            *buf = tmp[i];
+        ++buf;
+    }
+    while (size-- > 0) {
+        if (buf <= end)
+            *buf = ' ';
+        ++buf;
+    }
+    return buf;
+}
+
+/**
+* vsnprintf - Format a string and place it in a buffer
+* @buf: The buffer to place the result into
+* @size: The size of the buffer, including the trailing null space
+* @fmt: The format string to use
+* @args: Arguments for the format string
+*
+* Call this function if you are already dealing with a va_list.
+* You probably want snprintf instead.
+ */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+    int len;
+    unsigned long long num;
+    int i, base;
+    char *str, *end, c;
+    const char *s;
+
+    int flags;		/* flags to number() */
+
+    int field_width;	/* width of output field */
+    int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+    int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+				/* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+    str = buf;
+    end = buf + size - 1;
+
+    if (end < buf - 1) {
+        end = ((void *) -1);
+        size = end - buf + 1;
+    }
+
+    for (; *fmt ; ++fmt) {
+        if (*fmt != '%') {
+            if (str <= end)
+                *str = *fmt;
+            ++str;
+            continue;
+        }
+
+        /* process flags */
+        flags = 0;
+    repeat:
+        ++fmt;		/* this also skips first '%' */
+        switch (*fmt) {
+        case '-': flags |= LEFT; goto repeat;
+        case '+': flags |= PLUS; goto repeat;
+        case ' ': flags |= SPACE; goto repeat;
+        case '#': flags |= SPECIAL; goto repeat;
+        case '0': flags |= ZEROPAD; goto repeat;
+        }
+
+        /* get field width */
+        field_width = -1;
+        if (isdigit(*fmt))
+            field_width = skip_atoi(&fmt);
+        else if (*fmt == '*') {
+            ++fmt;
+            /* it's the next argument */
+            field_width = va_arg(args, int);
+            if (field_width < 0) {
+                field_width = -field_width;
+                flags |= LEFT;
+            }
+        }
+
+        /* get the precision */
+        precision = -1;
+        if (*fmt == '.') {
+            ++fmt;	
+            if (isdigit(*fmt))
+                precision = skip_atoi(&fmt);
+            else if (*fmt == '*') {
+                ++fmt;
+				/* it's the next argument */
+                precision = va_arg(args, int);
+            }
+            if (precision < 0)
+                precision = 0;
+        }
+
+        /* get the conversion qualifier */
+        qualifier = -1;
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+            qualifier = *fmt;
+            ++fmt;
+            if (qualifier == 'l' && *fmt == 'l') {
+                qualifier = 'L';
+                ++fmt;
+            }
+        }
+
+        /* default base */
+        base = 10;
+
+        switch (*fmt) {
+        case 'c':
+            if (!(flags & LEFT)) {
+                while (--field_width > 0) {
+                    if (str <= end)
+                        *str = ' ';
+                    ++str;
+                }
+            }
+            c = (unsigned char) va_arg(args, int);
+            if (str <= end)
+                *str = c;
+            ++str;
+            while (--field_width > 0) {
+                if (str <= end)
+                    *str = ' ';
+                ++str;
+            }
+            continue;
+
+        case 's':
+            s = va_arg(args, char *);
+            if (!s)
+                s = "<NULL>";
+
+            len = strnlen(s, precision);
+
+            if (!(flags & LEFT)) {
+                while (len < field_width--) {
+                    if (str <= end)
+                        *str = ' ';
+                    ++str;
+                }
+            }
+            for (i = 0; i < len; ++i) {
+                if (str <= end)
+                    *str = *s;
+                ++str; ++s;
+            }
+            while (len < field_width--) {
+                if (str <= end)
+                    *str = ' ';
+                ++str;
+            }
+            continue;
+
+        case 'p':
+            if (field_width == -1) {
+                field_width = 2*sizeof(void *);
+                flags |= ZEROPAD;
+            }
+            str = number(str, end,
+                         (unsigned long) va_arg(args, void *),
+                         16, field_width, precision, flags);
+            continue;
+
+
+        case 'n':
+				/* FIXME:
+                                 * What does C99 say about the overflow case here? */
+            if (qualifier == 'l') {
+                long * ip = va_arg(args, long *);
+                *ip = (str - buf);
+            } else if (qualifier == 'Z') {
+                size_t * ip = va_arg(args, size_t *);
+                *ip = (str - buf);
+            } else {
+                int * ip = va_arg(args, int *);
+                *ip = (str - buf);
+            }
+            continue;
+
+        case '%':
+            if (str <= end)
+                *str = '%';
+            ++str;
+            continue;
+
+				/* integer number formats - set up the flags and "break" */
+        case 'o':
+            base = 8;
+            break;
+
+        case 'X':
+            flags |= LARGE;
+        case 'x':
+            base = 16;
+            break;
+
+        case 'd':
+        case 'i':
+            flags |= SIGN;
+        case 'u':
+            break;
+
+        default:
+            if (str <= end)
+                *str = '%';
+            ++str;
+            if (*fmt) {
+                if (str <= end)
+                    *str = *fmt;
+                ++str;
+            } else {
+                --fmt;
+            }
+            continue;
+        }
+        if (qualifier == 'L')
+            num = va_arg(args, long long);
+        else if (qualifier == 'l') {
+            num = va_arg(args, unsigned long);
+            if (flags & SIGN)
+                num = (signed long) num;
+        } else if (qualifier == 'Z') {
+            num = va_arg(args, size_t);
+        } else if (qualifier == 'h') {
+            num = (unsigned short) va_arg(args, int);
+            if (flags & SIGN)
+                num = (signed short) num;
+        } else {
+            num = va_arg(args, unsigned int);
+            if (flags & SIGN)
+                num = (signed int) num;
+        }
+
+        str = number(str, end, num, base,
+                     field_width, precision, flags);
+    }
+    if (str <= end)
+        *str = '\0';
+    else if (size > 0)
+        /* don't write out a null byte if the buf size is zero */
+        *end = '\0';
+    /* the trailing null byte doesn't count towards the total
+     * ++str;
+     */
+    return str-buf;
+}
+
+/**
+ * snprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i=vsnprintf(buf,size,fmt,args);
+    va_end(args);
+    return i;
+}
+
+/**
+ * vsprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want sprintf instead.
+ */
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+    return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
+}
+
+
+/**
+ * sprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int sprintf(char * buf, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i=vsprintf(buf,fmt,args);
+    va_end(args);
+    return i;
+}
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf:	input buffer
+ * @fmt:	format of buffer
+ * @args:	arguments
+ */
+int vsscanf(const char * buf, const char * fmt, va_list args)
+{
+    const char *str = buf;
+    char *next;
+    int num = 0;
+    int qualifier;
+    int base;
+    int field_width = -1;
+    int is_sign = 0;
+
+    while(*fmt && *str) {
+        /* skip any white space in format */
+        /* white space in format matchs any amount of
+         * white space, including none, in the input.
+         */
+        if (isspace(*fmt)) {
+            while (isspace(*fmt))
+                ++fmt;
+            while (isspace(*str))
+                ++str;
+        }
+
+        /* anything that is not a conversion must match exactly */
+        if (*fmt != '%' && *fmt) {
+            if (*fmt++ != *str++)
+                break;
+            continue;
+        }
+
+        if (!*fmt)
+            break;
+        ++fmt;
+		
+        /* skip this conversion.
+         * advance both strings to next white space
+         */
+        if (*fmt == '*') {
+            while (!isspace(*fmt) && *fmt)
+                fmt++;
+            while (!isspace(*str) && *str)
+                str++;
+            continue;
+        }
+
+        /* get field width */
+        if (isdigit(*fmt))
+            field_width = skip_atoi(&fmt);
+
+        /* get conversion qualifier */
+        qualifier = -1;
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z') {
+            qualifier = *fmt;
+            fmt++;
+        }
+        base = 10;
+        is_sign = 0;
+
+        if (!*fmt || !*str)
+            break;
+
+        switch(*fmt++) {
+        case 'c':
+        {
+            char *s = (char *) va_arg(args,char*);
+            if (field_width == -1)
+                field_width = 1;
+            do {
+                *s++ = *str++;
+            } while(field_width-- > 0 && *str);
+            num++;
+        }
+        continue;
+        case 's':
+        {
+            char *s = (char *) va_arg(args, char *);
+            if(field_width == -1)
+                field_width = INT_MAX;
+            /* first, skip leading white space in buffer */
+            while (isspace(*str))
+                str++;
+
+            /* now copy until next white space */
+            while (*str && !isspace(*str) && field_width--) {
+                *s++ = *str++;
+            }
+            *s = '\0';
+            num++;
+        }
+        continue;
+        case 'n':
+            /* return number of characters read so far */
+        {
+            int *i = (int *)va_arg(args,int*);
+            *i = str - buf;
+        }
+        continue;
+        case 'o':
+            base = 8;
+            break;
+        case 'x':
+        case 'X':
+            base = 16;
+            break;
+        case 'd':
+        case 'i':
+            is_sign = 1;
+        case 'u':
+            break;
+        case '%':
+            /* looking for '%' in str */
+            if (*str++ != '%') 
+                return num;
+            continue;
+        default:
+            /* invalid format; stop here */
+            return num;
+        }
+
+        /* have some sort of integer conversion.
+         * first, skip white space in buffer.
+         */
+        while (isspace(*str))
+            str++;
+
+        if (!*str || !isdigit(*str))
+            break;
+
+        switch(qualifier) {
+        case 'h':
+            if (is_sign) {
+                short *s = (short *) va_arg(args,short *);
+                *s = (short) simple_strtol(str,&next,base);
+            } else {
+                unsigned short *s = (unsigned short *) va_arg(args, unsigned short *);
+                *s = (unsigned short) simple_strtoul(str, &next, base);
+            }
+            break;
+        case 'l':
+            if (is_sign) {
+                long *l = (long *) va_arg(args,long *);
+                *l = simple_strtol(str,&next,base);
+            } else {
+                unsigned long *l = (unsigned long*) va_arg(args,unsigned long*);
+                *l = simple_strtoul(str,&next,base);
+            }
+            break;
+        case 'L':
+            if (is_sign) {
+                long long *l = (long long*) va_arg(args,long long *);
+                *l = simple_strtoll(str,&next,base);
+            } else {
+                unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*);
+                *l = simple_strtoull(str,&next,base);
+            }
+            break;
+        case 'Z':
+        {
+            size_t *s = (size_t*) va_arg(args,size_t*);
+            *s = (size_t) simple_strtoul(str,&next,base);
+        }
+        break;
+        default:
+            if (is_sign) {
+                int *i = (int *) va_arg(args, int*);
+                *i = (int) simple_strtol(str,&next,base);
+            } else {
+                unsigned int *i = (unsigned int*) va_arg(args, unsigned int*);
+                *i = (unsigned int) simple_strtoul(str,&next,base);
+            }
+            break;
+        }
+        num++;
+
+        if (!next)
+            break;
+        str = next;
+    }
+    return num;
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf:	input buffer
+ * @fmt:	formatting of buffer
+ * @...:	resulting arguments
+ */
+int sscanf(const char * buf, const char * fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args,fmt);
+    i = vsscanf(buf,fmt,args);
+    va_end(args);
+    return i;
+}
diff --git a/xen/drivers/Makefile b/xen/drivers/Makefile
new file mode 100644
index 0000000000..4aa76a3f25
--- /dev/null
+++ b/xen/drivers/Makefile
@@ -0,0 +1,16 @@
+
+default:
+	$(MAKE) -C char
+	$(MAKE) -C pci
+	$(MAKE) -C net
+	$(MAKE) -C block
+	$(MAKE) -C ide
+	$(MAKE) -C scsi
+
+clean:
+	$(MAKE) -C char clean
+	$(MAKE) -C pci clean
+	$(MAKE) -C net clean
+	$(MAKE) -C block clean
+	$(MAKE) -C ide clean
+	$(MAKE) -C scsi clean
diff --git a/xen/drivers/block/Makefile b/xen/drivers/block/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen/drivers/block/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o driver.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/drivers/block/blkpg.c b/xen/drivers/block/blkpg.c
new file mode 100644
index 0000000000..2e27a1aa2b
--- /dev/null
+++ b/xen/drivers/block/blkpg.c
@@ -0,0 +1,315 @@
+/*
+ * Partition table and disk geometry handling
+ *
+ * This obsoletes the partition-handling code in genhd.c:
+ * Userspace can look at a disk in arbitrary format and tell
+ * the kernel what partitions there are on the disk, and how
+ * these should be numbered.
+ * It also allows one to repartition a disk that is being used.
+ *
+ * A single ioctl with lots of subfunctions:
+ *
+ * Device number stuff:
+ *    get_whole_disk()          (given the device number of a partition, find
+ *                               the device number of the encompassing disk)
+ *    get_all_partitions()      (given the device number of a disk, return the
+ *                               device numbers of all its known partitions)
+ *
+ * Partition stuff:
+ *    add_partition()
+ *    delete_partition()
+ *    test_partition_in_use()   (also for test_disk_in_use)
+ *
+ * Geometry stuff:
+ *    get_geometry()
+ *    set_geometry()
+ *    get_bios_drivedata()
+ *
+ * For today, only the partition stuff - aeb, 990515
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/errno.h>
+/*#include <xeno/fs.h>	*/		/* for BLKRASET, ... */
+#include <xeno/sched.h>		/* for capable() */
+#include <xeno/blk.h>			/* for set_device_ro() */
+#include <xeno/blkpg.h>
+#include <xeno/genhd.h>
+/*#include <xeno/swap.h>*/			/* for is_swap_partition() */
+#include <xeno/module.h>               /* for EXPORT_SYMBOL */
+
+#include <asm/uaccess.h>
+
+#define is_mounted(_dev)         (0)
+#define is_swap_partition(_dev)  (0)
+
+#define fsync_dev(_dev) (panic("fsync_dev???"))
+#define invalidate_buffers(_dev) (panic("invalidate_buffers???"))
+
+/*
+ * What is the data describing a partition?
+ *
+ * 1. a device number (kdev_t)
+ * 2. a starting sector and number of sectors (hd_struct)
+ *    given in the part[] array of the gendisk structure for the drive.
+ *
+ * The number of sectors is replicated in the sizes[] array of
+ * the gendisk structure for the major, which again is copied to
+ * the blk_size[][] array.
+ * (However, hd_struct has the number of 512-byte sectors,
+ *  g->sizes[] and blk_size[][] have the number of 1024-byte blocks.)
+ * Note that several drives may have the same major.
+ */
+
+/*
+ * Add a partition.
+ *
+ * returns: EINVAL: bad parameters
+ *          ENXIO: cannot find drive
+ *          EBUSY: proposed partition overlaps an existing one
+ *                 or has the same number as an existing one
+ *          0: all OK.
+ */
+int add_partition(kdev_t dev, struct blkpg_partition *p) {
+	struct gendisk *g;
+	long long ppstart, pplength;
+	long pstart, plength;
+	int i, drive, first_minor, end_minor, minor;
+
+	/* convert bytes to sectors, check for fit in a hd_struct */
+	ppstart = (p->start >> 9);
+	pplength = (p->length >> 9);
+	pstart = ppstart;
+	plength = pplength;
+	if (pstart != ppstart || plength != pplength
+	    || pstart < 0 || plength < 0)
+		return -EINVAL;
+
+	/* find the drive major */
+	g = get_gendisk(dev);
+	if (!g)
+		return -ENXIO;
+
+	/* existing drive? */
+	drive = (MINOR(dev) >> g->minor_shift);
+	first_minor = (drive << g->minor_shift);
+	end_minor   = first_minor + g->max_p;
+	if (drive >= g->nr_real)
+		return -ENXIO;
+
+	/* drive and partition number OK? */
+	if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p)
+		return -EINVAL;
+
+	/* partition number in use? */
+	minor = first_minor + p->pno;
+	if (g->part[minor].nr_sects != 0)
+		return -EBUSY;
+
+	/* overlap? */
+	for (i=first_minor+1; i<end_minor; i++)
+		if (!(pstart+plength <= g->part[i].start_sect ||
+		      pstart >= g->part[i].start_sect + g->part[i].nr_sects))
+			return -EBUSY;
+
+	/* all seems OK */
+	g->part[minor].start_sect = pstart;
+	g->part[minor].nr_sects = plength;
+	if (g->sizes)
+		g->sizes[minor] = (plength >> (BLOCK_SIZE_BITS - 9));
+#ifdef DEVFS_MUST_DIE
+	devfs_register_partitions (g, first_minor, 0);
+#endif
+	return 0;
+}
+
+/*
+ * Delete a partition given by partition number
+ *
+ * returns: EINVAL: bad parameters
+ *          ENXIO: cannot find partition
+ *          EBUSY: partition is busy
+ *          0: all OK.
+ *
+ * Note that the dev argument refers to the entire disk, not the partition.
+ */
+int del_partition(kdev_t dev, struct blkpg_partition *p) {
+	struct gendisk *g;
+	kdev_t devp;
+	int drive, first_minor, minor;
+
+	/* find the drive major */
+	g = get_gendisk(dev);
+	if (!g)
+		return -ENXIO;
+
+	/* drive and partition number OK? */
+	drive = (MINOR(dev) >> g->minor_shift);
+	first_minor = (drive << g->minor_shift);
+	if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p)
+		return -EINVAL;
+
+	/* existing drive and partition? */
+	minor = first_minor + p->pno;
+	if (drive >= g->nr_real || g->part[minor].nr_sects == 0)
+		return -ENXIO;
+
+	/* partition in use? Incomplete check for now. */
+	devp = MKDEV(MAJOR(dev), minor);
+	if (is_mounted(devp) || is_swap_partition(devp))
+		return -EBUSY;
+
+	/* all seems OK */
+	fsync_dev(devp);
+	invalidate_buffers(devp);
+
+	g->part[minor].start_sect = 0;
+	g->part[minor].nr_sects = 0;
+	if (g->sizes)
+		g->sizes[minor] = 0;
+#ifdef DEVFS_MUST_DIE
+	devfs_register_partitions (g, first_minor, 0);
+#endif
+
+	return 0;
+}
+
+int blkpg_ioctl(kdev_t dev, struct blkpg_ioctl_arg *arg)
+{
+	struct blkpg_ioctl_arg a;
+	struct blkpg_partition p;
+	int len;
+
+	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
+		return -EFAULT;
+
+	switch (a.op) {
+		case BLKPG_ADD_PARTITION:
+		case BLKPG_DEL_PARTITION:
+			len = a.datalen;
+			if (len < sizeof(struct blkpg_partition))
+				return -EINVAL;
+			if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+				return -EFAULT;
+			if (!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			if (a.op == BLKPG_ADD_PARTITION)
+				return add_partition(dev, &p);
+			else
+				return del_partition(dev, &p);
+		default:
+			return -EINVAL;
+	}
+}
+
+/*
+ * Common ioctl's for block devices
+ */
+
+int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg)
+{
+#if 1
+    printk("May want to check out blk_ioctl...\n");
+    return -EINVAL;
+#else
+	struct gendisk *g;
+	u64 ullval = 0;
+	int intval;
+
+	if (!dev)
+		return -EINVAL;
+
+	switch (cmd) {
+		case BLKROSET:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			if (get_user(intval, (int *)(arg)))
+				return -EFAULT;
+			set_device_ro(dev, intval);
+			return 0;
+		case BLKROGET:
+			intval = (is_read_only(dev) != 0);
+			return put_user(intval, (int *)(arg));
+
+		case BLKRASET:
+			if(!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			if(arg > 0xff)
+				return -EINVAL;
+			read_ahead[MAJOR(dev)] = arg;
+			return 0;
+		case BLKRAGET:
+			if (!arg)
+				return -EINVAL;
+			return put_user(read_ahead[MAJOR(dev)], (long *) arg);
+
+		case BLKFLSBUF:
+			if(!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			fsync_dev(dev);
+			invalidate_buffers(dev);
+			return 0;
+
+		case BLKSSZGET:
+			/* get block device sector size as needed e.g. by fdisk */
+			intval = get_hardsect_size(dev);
+			return put_user(intval, (int *) arg);
+
+		case BLKGETSIZE:
+		case BLKGETSIZE64:
+			g = get_gendisk(dev);
+			if (g)
+				ullval = g->part[MINOR(dev)].nr_sects;
+
+			if (cmd == BLKGETSIZE)
+				return put_user((unsigned long)ullval, (unsigned long *)arg);
+			else
+				return put_user(ullval << 9, (u64 *)arg);
+#if 0
+		case BLKRRPART: /* Re-read partition tables */
+			if (!capable(CAP_SYS_ADMIN)) 
+				return -EACCES;
+			return reread_partitions(dev, 1);
+#endif
+
+		case BLKPG:
+			return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg);
+			
+		case BLKELVGET:
+			return blkelvget_ioctl(&blk_get_queue(dev)->elevator,
+					       (blkelv_ioctl_arg_t *) arg);
+		case BLKELVSET:
+			return blkelvset_ioctl(&blk_get_queue(dev)->elevator,
+					       (blkelv_ioctl_arg_t *) arg);
+
+		case BLKBSZGET:
+			/* get the logical block size (cf. BLKSSZGET) */
+			intval = BLOCK_SIZE;
+			if (blksize_size[MAJOR(dev)])
+				intval = blksize_size[MAJOR(dev)][MINOR(dev)];
+			return put_user (intval, (int *) arg);
+
+		case BLKBSZSET:
+			/* set the logical block size */
+			if (!capable (CAP_SYS_ADMIN))
+				return -EACCES;
+			if (!dev || !arg)
+				return -EINVAL;
+			if (get_user (intval, (int *) arg))
+				return -EFAULT;
+			if (intval > PAGE_SIZE || intval < 512 ||
+			    (intval & (intval - 1)))
+				return -EINVAL;
+			if (is_mounted (dev) || is_swap_partition (dev))
+				return -EBUSY;
+			set_blocksize (dev, intval);
+			return 0;
+
+		default:
+			return -EINVAL;
+	}
+#endif
+}
+
+EXPORT_SYMBOL(blk_ioctl);
diff --git a/xen/drivers/block/elevator.c b/xen/drivers/block/elevator.c
new file mode 100644
index 0000000000..281e8f8b8d
--- /dev/null
+++ b/xen/drivers/block/elevator.c
@@ -0,0 +1,224 @@
+/*
+ *  linux/drivers/block/elevator.c
+ *
+ *  Block device elevator/IO-scheduler.
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ * 30042000 Jens Axboe <axboe@suse.de> :
+ *
+ * Split the elevator a bit so that it is possible to choose a different
+ * one or even write a new "plug in". There are three pieces:
+ * - elevator_fn, inserts a new request in the queue list
+ * - elevator_merge_fn, decides whether a new buffer can be merged with
+ *   an existing request
+ * - elevator_dequeue_fn, called when a request is taken off the active list
+ *
+ * 20082000 Dave Jones <davej@suse.de> :
+ * Removed tests for max-bomb-segments, which was breaking elvtune
+ *  when run without -bN
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+/*#include <xeno/fs.h>*/
+#include <xeno/blkdev.h>
+#include <xeno/elevator.h>
+#include <xeno/blk.h>
+#include <xeno/module.h>
+#include <asm/uaccess.h>
+
+/*
+ * This is a bit tricky. It's given that bh and rq are for the same
+ * device, but the next request might of course not be. Run through
+ * the tests below to check if we want to insert here if we can't merge
+ * bh into an existing request
+ */
+inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq,
+			    struct list_head *head)
+{
+	struct list_head *next;
+	struct request *next_rq;
+
+	next = rq->queue.next;
+	if (next == head)
+		return 0;
+
+	/*
+	 * if the device is different (usually on a different partition),
+	 * just check if bh is after rq
+	 */
+	next_rq = blkdev_entry_to_request(next);
+	if (next_rq->rq_dev != rq->rq_dev)
+		return bh->b_rsector > rq->sector;
+
+	/*
+	 * ok, rq, next_rq and bh are on the same device. if bh is in between
+	 * the two, this is the sweet spot
+	 */
+	if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector)
+		return 1;
+
+	/*
+	 * next_rq is ordered wrt rq, but bh is not in between the two
+	 */
+	if (next_rq->sector > rq->sector)
+		return 0;
+
+	/*
+	 * next_rq and rq not ordered, if we happen to be either before
+	 * next_rq or after rq insert here anyway
+	 */
+	if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector)
+		return 1;
+
+	return 0;
+}
+
+
+int elevator_linus_merge(request_queue_t *q, struct request **req,
+			 struct list_head * head,
+			 struct buffer_head *bh, int rw,
+			 int max_sectors)
+{
+	struct list_head *entry = &q->queue_head;
+	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+
+	while ((entry = entry->prev) != head) {
+		struct request *__rq = blkdev_entry_to_request(entry);
+
+		/*
+		 * simply "aging" of requests in queue
+		 */
+		if (__rq->elevator_sequence-- <= 0)
+			break;
+
+		if (__rq->waiting)
+			continue;
+		if (__rq->rq_dev != bh->b_rdev)
+			continue;
+		if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head))
+			*req = __rq;
+		if (__rq->cmd != rw)
+			continue;
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->elevator_sequence < count)
+			break;
+		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+			ret = ELEVATOR_BACK_MERGE;
+			*req = __rq;
+			break;
+		} else if (__rq->sector - count == bh->b_rsector) {
+			ret = ELEVATOR_FRONT_MERGE;
+			__rq->elevator_sequence -= count;
+			*req = __rq;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
+{
+	struct list_head *entry = &req->queue, *head = &q->queue_head;
+
+	/*
+	 * second pass scan of requests that got passed over, if any
+	 */
+	while ((entry = entry->next) != head) {
+		struct request *tmp = blkdev_entry_to_request(entry);
+		tmp->elevator_sequence -= count;
+	}
+}
+
+void elevator_linus_merge_req(struct request *req, struct request *next)
+{
+	if (next->elevator_sequence < req->elevator_sequence)
+		req->elevator_sequence = next->elevator_sequence;
+}
+
+/*
+ * See if we can find a request that this buffer can be coalesced with.
+ */
+int elevator_noop_merge(request_queue_t *q, struct request **req,
+			struct list_head * head,
+			struct buffer_head *bh, int rw,
+			int max_sectors)
+{
+	struct list_head *entry;
+	unsigned int count = bh->b_size >> 9;
+
+	if (list_empty(&q->queue_head))
+		return ELEVATOR_NO_MERGE;
+
+	entry = &q->queue_head;
+	while ((entry = entry->prev) != head) {
+		struct request *__rq = blkdev_entry_to_request(entry);
+
+		if (__rq->cmd != rw)
+			continue;
+		if (__rq->rq_dev != bh->b_rdev)
+			continue;
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->waiting)
+			continue;
+		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+			*req = __rq;
+			return ELEVATOR_BACK_MERGE;
+		} else if (__rq->sector - count == bh->b_rsector) {
+			*req = __rq;
+			return ELEVATOR_FRONT_MERGE;
+		}
+	}
+
+	*req = blkdev_entry_to_request(q->queue_head.prev);
+	return ELEVATOR_NO_MERGE;
+}
+
+void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
+
+void elevator_noop_merge_req(struct request *req, struct request *next) {}
+
+int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
+{
+	blkelv_ioctl_arg_t output;
+
+	output.queue_ID			= elevator->queue_ID;
+	output.read_latency		= elevator->read_latency;
+	output.write_latency		= elevator->write_latency;
+	output.max_bomb_segments	= 0;
+
+	if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
+		return -EFAULT;
+
+	return 0;
+}
+
+int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+{
+	blkelv_ioctl_arg_t input;
+
+	if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
+		return -EFAULT;
+
+	if (input.read_latency < 0)
+		return -EINVAL;
+	if (input.write_latency < 0)
+		return -EINVAL;
+
+	elevator->read_latency		= input.read_latency;
+	elevator->write_latency		= input.write_latency;
+	return 0;
+}
+
+void elevator_init(elevator_t * elevator, elevator_t type)
+{
+	static unsigned int queue_ID;
+
+	*elevator = type;
+	elevator->queue_ID = queue_ID++;
+}
diff --git a/xen/drivers/block/genhd.c b/xen/drivers/block/genhd.c
new file mode 100644
index 0000000000..427c2cb312
--- /dev/null
+++ b/xen/drivers/block/genhd.c
@@ -0,0 +1,219 @@
+/*
+ *  Code extracted from
+ *  linux/kernel/hd.c
+ *
+ *  Copyright (C) 1991-1998  Linus Torvalds
+ *
+ *  devfs support - jj, rgooch, 980122
+ *
+ *  Moved partition checking code to fs/partitions* - Russell King
+ *  (linux@arm.uk.linux.org)
+ */
+
+/*
+ * TODO:  rip out the remaining init crap from this file  --hch
+ */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+/*#include <xeno/fs.h>*/
+#include <xeno/genhd.h>
+#include <xeno/lib.h>
+#include <xeno/blk.h>
+#include <xeno/init.h>
+#include <xeno/spinlock.h>
+
+
+static rwlock_t gendisk_lock;
+
+/*
+ * Global kernel list of partitioning information.
+ *
+ * XXX: you should _never_ access this directly.
+ *	the only reason this is exported is source compatiblity.
+ */
+/*static*/ struct gendisk *gendisk_head;
+static struct gendisk *gendisk_array[MAX_BLKDEV];
+
+EXPORT_SYMBOL(gendisk_head);
+
+
+/**
+ * add_gendisk - add partitioning information to kernel list
+ * @gp: per-device partitioning information
+ *
+ * This function registers the partitioning information in @gp
+ * with the kernel.
+ */
+void
+add_gendisk(struct gendisk *gp)
+{
+	struct gendisk *sgp;
+
+	write_lock(&gendisk_lock);
+
+	/*
+ 	 *	In 2.5 this will go away. Fix the drivers who rely on
+ 	 *	old behaviour.
+ 	 */
+
+	for (sgp = gendisk_head; sgp; sgp = sgp->next)
+	{
+		if (sgp == gp)
+		{
+//			printk(KERN_ERR "add_gendisk: device major %d is buggy and added a live gendisk!\n",
+//				sgp->major)
+			goto out;
+		}
+	}
+	gendisk_array[gp->major] = gp;
+	gp->next = gendisk_head;
+	gendisk_head = gp;
+out:
+	write_unlock(&gendisk_lock);
+}
+
+EXPORT_SYMBOL(add_gendisk);
+
+
+/**
+ * del_gendisk - remove partitioning information from kernel list
+ * @gp: per-device partitioning information
+ *
+ * This function unregisters the partitioning information in @gp
+ * with the kernel.
+ */
+void
+del_gendisk(struct gendisk *gp)
+{
+	struct gendisk **gpp;
+
+	write_lock(&gendisk_lock);
+	gendisk_array[gp->major] = NULL;
+	for (gpp = &gendisk_head; *gpp; gpp = &((*gpp)->next))
+		if (*gpp == gp)
+			break;
+	if (*gpp)
+		*gpp = (*gpp)->next;
+	write_unlock(&gendisk_lock);
+}
+
+EXPORT_SYMBOL(del_gendisk);
+
+
+/**
+ * get_gendisk - get partitioning information for a given device
+ * @dev: device to get partitioning information for
+ *
+ * This function gets the structure containing partitioning
+ * information for the given device @dev.
+ */
+struct gendisk *
+get_gendisk(kdev_t dev)
+{
+	struct gendisk *gp = NULL;
+	int maj = MAJOR(dev);
+
+	read_lock(&gendisk_lock);
+	if ((gp = gendisk_array[maj]))
+		goto out;
+
+	/* This is needed for early 2.4 source compatiblity.  --hch */
+	for (gp = gendisk_head; gp; gp = gp->next)
+		if (gp->major == maj)
+			break;
+out:
+	read_unlock(&gendisk_lock);
+	return gp;
+}
+
+EXPORT_SYMBOL(get_gendisk);
+
+
+/**
+ * walk_gendisk - issue a command for every registered gendisk
+ * @walk: user-specified callback
+ * @data: opaque data for the callback
+ *
+ * This function walks through the gendisk chain and calls back
+ * into @walk for every element.
+ */
+int
+walk_gendisk(int (*walk)(struct gendisk *, void *), void *data)
+{
+	struct gendisk *gp;
+	int error = 0;
+
+	read_lock(&gendisk_lock);
+	for (gp = gendisk_head; gp; gp = gp->next)
+		if ((error = walk(gp, data)))
+			break;
+	read_unlock(&gendisk_lock);
+
+	return error;
+}
+
+
+#ifdef CONFIG_PROC_FS
+int
+get_partition_list(char *page, char **start, off_t offset, int count)
+{
+	struct gendisk *gp;
+	struct hd_struct *hd;
+	char buf[64];
+	int len, n;
+
+	len = sprintf(page, "major minor  #blocks  name\n\n");
+		
+	read_lock(&gendisk_lock);
+	for (gp = gendisk_head; gp; gp = gp->next) {
+		for (n = 0; n < (gp->nr_real << gp->minor_shift); n++) {
+			if (gp->part[n].nr_sects == 0)
+				continue;
+
+			hd = &gp->part[n]; disk_round_stats(hd);
+			len += sprintf(page + len,
+				"%4d  %4d %10d %s\n", gp->major,
+				n, gp->sizes[n], disk_name(gp, n, buf));
+
+			if (len < offset)
+				offset -= len, len = 0;
+			else if (len >= offset + count)
+				goto out;
+		}
+	}
+
+out:
+	read_unlock(&gendisk_lock);
+	*start = page + offset;
+	len -= offset;
+	if (len < 0)
+		len = 0;
+	return len > count ? count : len;
+}
+#endif
+
+
+extern int blk_dev_init(void);
+extern int net_dev_init(void);
+extern void console_map_init(void);
+extern int atmdev_init(void);
+
+int __init device_init(void)
+{
+	rwlock_init(&gendisk_lock);
+	blk_dev_init();
+	sti();
+#ifdef CONFIG_NET
+	net_dev_init();
+#endif
+#ifdef CONFIG_ATM
+	(void) atmdev_init();
+#endif
+#ifdef CONFIG_VT
+	console_map_init();
+#endif
+	return 0;
+}
+
+__initcall(device_init);
diff --git a/xen/drivers/block/ll_rw_blk.c b/xen/drivers/block/ll_rw_blk.c
new file mode 100644
index 0000000000..615b332c4b
--- /dev/null
+++ b/xen/drivers/block/ll_rw_blk.c
@@ -0,0 +1,1461 @@
+/*
+ *  linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+/*#include <xeno/kernel_stat.h>*/
+#include <xeno/errno.h>
+/*#include <xeno/locks.h>*/
+#include <xeno/mm.h>
+/*#include <xeno/swap.h>*/
+#include <xeno/init.h>
+/*#include <xeno/smp_lock.h>*/
+/*#include <xeno/completion.h>*/
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <xeno/blk.h>
+/*#include <xeno/highmem.h>*/
+#include <xeno/slab.h>
+#include <xeno/module.h>
+
+/*
+ * KAF: We can turn off noise relating to barking guest-OS requests.
+ */
+#if 0
+#define DPRINTK(_f, _a...) printk(_f , ## _a)
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+/* This will die as all synchronous stuff is coming to an end */
+#define complete(_r) panic("completion.h stuff may be needed...")
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ *	*request_fn
+ *	*current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ *		then 512 bytes is assumed.
+ * else
+ *		sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+static inline int get_max_sectors(kdev_t dev)
+{
+	if (!max_sectors[MAJOR(dev)])
+		return MAX_SECTORS;
+	return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+	if (bdev->queue)
+		return bdev->queue(dev);
+	else
+		return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+	struct list_head *head = &list->free;
+	struct request *rq;
+	int i = 0;
+
+	while (!list_empty(head)) {
+		rq = list_entry(head->next, struct request, queue);
+		list_del(&rq->queue);
+		kmem_cache_free(request_cachep, rq);
+		i++;
+	};
+
+	if (i != list->count)
+		printk("request list leak!\n");
+
+	list->count = 0;
+	return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q:    the request queue to be released
+ *
+ * Description:
+ *     blk_cleanup_queue is the pair to blk_init_queue().  It should
+ *     be called when a request queue is being released; typically
+ *     when a block device is being de-registered.  Currently, its
+ *     primary task it to free all the &struct request structures that
+ *     were allocated to the queue.
+ * Caveat: 
+ *     Hopefully the low level driver will have finished any
+ *     outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+	int count = q->nr_requests;
+
+	count -= __blk_cleanup_queue(&q->rq[READ]);
+	count -= __blk_cleanup_queue(&q->rq[WRITE]);
+
+	if (count)
+		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+
+	memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication where the head of the queue is active.
+ *
+ * Description:
+ *    The driver for a block device may choose to leave the currently active
+ *    request on the request queue, removing it only when it has completed.
+ *    The queue handling routines assume this by default for safety reasons
+ *    and will not involve the head of the request queue in any merging or
+ *    reordering of requests when the queue is unplugged (and thus may be
+ *    working on this particular request).
+ *
+ *    If a driver removes requests from the queue before processing them, then
+ *    it may indicate that it does so, there by allowing the head of the queue
+ *    to be involved in merging and reordering.  This is done be calling
+ *    blk_queue_headactive() with an @active flag of %0.
+ *
+ *    If a driver processes several requests at once, it must remove them (or
+ *    at least all but one of them) from the request queue.
+ *
+ *    When a queue is plugged the head will be assumed to be inactive.
+ **/
+ 
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+	q->head_active = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q:  the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ *    The normal way for &struct buffer_heads to be passed to a device
+ *    driver is for them to be collected into requests on a request
+ *    queue, and then to allow the device driver to select requests
+ *    off that queue when it is ready.  This works well for many block
+ *    devices. However some block devices (typically virtual devices
+ *    such as md or lvm) do not benefit from the processing on the
+ *    request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function
+ *    to blk_queue_make_request().
+ *
+ * Caveat:
+ *    The driver that does this *must* be able to deal appropriately
+ *    with buffers in "highmemory", either by calling bh_kmap() to get
+ *    a kernel mapping, to by calling create_bounce() to create a
+ *    buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+	q->make_request_fn = mfn;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+	if (req->nr_segments < max_segments) {
+		req->nr_segments++;
+		return 1;
+	}
+	return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
+			    struct buffer_head *bh, int max_segments)
+{
+	if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+		return 1;
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
+			     struct buffer_head *bh, int max_segments)
+{
+	if (bh->b_data + bh->b_size == req->bh->b_data)
+		return 1;
+	return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+				struct request *next, int max_segments)
+{
+	int total_segments = req->nr_segments + next->nr_segments;
+
+	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+		total_segments--;
+    
+	if (total_segments > max_segments)
+		return 0;
+
+	req->nr_segments = total_segments;
+	return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+	/*
+	 * no need to replug device
+	 */
+	if (!list_empty(&q->queue_head) || q->plugged)
+		return;
+
+	q->plugged = 1;
+	queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+	if (q->plugged) {
+		q->plugged = 0;
+		if (!list_empty(&q->queue_head))
+		  {
+			q->request_fn(q);
+		  }
+	}
+}
+
+void generic_unplug_device(void *data)
+{
+	request_queue_t *q = (request_queue_t *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	__generic_unplug_device(q);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/** blk_grow_request_list
+ *  @q: The &request_queue_t
+ *  @nr_requests: how many requests are desired
+ *
+ * More free requests are added to the queue's free lists, bringing
+ * the total number of requests to @nr_requests.
+ *
+ * The requests are added equally to the request queue's read
+ * and write freelists.
+ *
+ * This function can sleep.
+ *
+ * Returns the (new) number of requests which the queue has available.
+ */
+int blk_grow_request_list(request_queue_t *q, int nr_requests)
+{
+	unsigned long flags;
+	/* Several broken drivers assume that this function doesn't sleep,
+	 * this causes system hangs during boot.
+	 * As a temporary fix, make the the function non-blocking.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+	while (q->nr_requests < nr_requests) {
+		struct request *rq;
+		int rw;
+
+		rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
+		if (rq == NULL)
+			break;
+		memset(rq, 0, sizeof(*rq));
+		rq->rq_status = RQ_INACTIVE;
+		rw = q->nr_requests & 1;
+		list_add(&rq->queue, &q->rq[rw].free);
+		q->rq[rw].count++;
+		q->nr_requests++;
+	}
+	q->batch_requests = q->nr_requests / 4;
+	if (q->batch_requests > 32)
+		q->batch_requests = 32;
+	spin_unlock_irqrestore(&io_request_lock, flags);
+	return q->nr_requests;
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+    /*struct sysinfo si;*/
+    /*int megs;*/		/* Total memory, in megabytes */
+	int nr_requests;
+
+	INIT_LIST_HEAD(&q->rq[READ].free);
+	INIT_LIST_HEAD(&q->rq[WRITE].free);
+	q->rq[READ].count = 0;
+	q->rq[WRITE].count = 0;
+	q->nr_requests = 0;
+
+#if 0
+	si_meminfo(&si);
+	megs = si.totalram >> (20 - PAGE_SHIFT);
+	nr_requests = 128;
+	if (megs < 32)
+		nr_requests /= 2;
+#else
+        nr_requests = 128;
+#endif
+	blk_grow_request_list(q, nr_requests);
+
+#if 0
+	init_waitqueue_head(&q->wait_for_requests[0]);
+	init_waitqueue_head(&q->wait_for_requests[1]);
+#endif
+	spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue  - prepare a request queue for use with a block device
+ * @q:    The &request_queue_t to be initialised
+ * @rfn:  The function to be called to process requests that have been
+ *        placed on the queue.
+ *
+ * Description:
+ *    If a block device wishes to use the standard request handling procedures,
+ *    which sorts requests and coalesces adjacent requests, then it must
+ *    call blk_init_queue().  The function @rfn will be called when there
+ *    are requests on the queue that need to be processed.  If the device
+ *    supports plugging, then @rfn may not be called immediately when requests
+ *    are available on the queue, but may be called at some time later instead.
+ *    Plugged queues are generally unplugged when a buffer belonging to one
+ *    of the requests on the queue is needed, or due to memory pressure.
+ *
+ *    @rfn is not required, or even expected, to remove all requests off the
+ *    queue, but only as many as it can handle at a time.  If it does leave
+ *    requests on the queue, it is responsible for arranging that the requests
+ *    get dealt with eventually.
+ *
+ *    A global spin lock $io_request_lock must be held while manipulating the
+ *    requests on the request queue.
+ *
+ *    The request on the head of the queue is by default assumed to be
+ *    potentially active, and it is not considered for re-ordering or merging
+ *    whenever the given queue is unplugged. This behaviour can be changed with
+ *    blk_queue_headactive().
+ *
+ * Note:
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
+ *    when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+	INIT_LIST_HEAD(&q->queue_head);
+	elevator_init(&q->elevator, ELEVATOR_LINUS);
+	blk_init_free_list(q);
+	q->request_fn     	= rfn;
+	q->back_merge_fn       	= ll_back_merge_fn;
+	q->front_merge_fn      	= ll_front_merge_fn;
+	q->merge_requests_fn	= ll_merge_requests_fn;
+	q->make_request_fn	= __make_request;
+	q->plug_tq.sync		= 0;
+	q->plug_tq.routine	= &generic_unplug_device;
+	q->plug_tq.data		= q;
+	q->plugged        	= 0;
+	/*
+	 * These booleans describe the queue properties.  We set the
+	 * default (and most common) values here.  Other drivers can
+	 * use the appropriate functions to alter the queue properties.
+	 * as appropriate.
+	 */
+	q->plug_device_fn 	= generic_plug_device;
+	q->head_active    	= 1;
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in.  Returns NULL if there are no free requests.
+ */
+static struct request *get_request(request_queue_t *q, int rw)
+{
+	struct request *rq = NULL;
+	struct request_list *rl = q->rq + rw;
+
+	if (!list_empty(&rl->free)) {
+		rq = blkdev_free_rq(&rl->free);
+		list_del(&rq->queue);
+		rl->count--;
+		rq->rq_status = RQ_ACTIVE;
+		rq->cmd = rw;
+		rq->special = NULL;
+		rq->q = q;
+	}
+
+	return rq;
+}
+
+/*
+ * Here's the request allocation design:
+ *
+ * 1: Blocking on request exhaustion is a key part of I/O throttling.
+ * 
+ * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
+ *    attempt to ensure that all requesters sleep for a similar duration.  Hence
+ *    no stealing requests when there are other processes waiting.
+ * 
+ * 3: We also wish to support `batching' of requests.  So when a process is
+ *    woken, we want to allow it to allocate a decent number of requests
+ *    before it blocks again, so they can be nicely merged (this only really
+ *    matters if the process happens to be adding requests near the head of
+ *    the queue).
+ * 
+ * 4: We want to avoid scheduling storms.  This isn't really important, because
+ *    the system will be I/O bound anyway.  But it's easy.
+ * 
+ *    There is tension between requirements 2 and 3.  Once a task has woken,
+ *    we don't want to allow it to sleep as soon as it takes its second request.
+ *    But we don't want currently-running tasks to steal all the requests
+ *    from the sleepers.  We handle this with wakeup hysteresis around
+ *    0 .. batch_requests and with the assumption that request taking is much,
+ *    much faster than request freeing.
+ * 
+ * So here's what we do:
+ * 
+ *    a) A READA requester fails if free_requests < batch_requests
+ * 
+ *       We don't want READA requests to prevent sleepers from ever
+ *       waking.  Note that READA is used extremely rarely - a few
+ *       filesystems use it for directory readahead.
+ * 
+ *  When a process wants a new request:
+ * 
+ *    b) If free_requests == 0, the requester sleeps in FIFO manner.
+ * 
+ *    b) If 0 <  free_requests < batch_requests and there are waiters,
+ *       we still take a request non-blockingly.  This provides batching.
+ *
+ *    c) If free_requests >= batch_requests, the caller is immediately
+ *       granted a new request.
+ * 
+ *  When a request is released:
+ * 
+ *    d) If free_requests < batch_requests, do nothing.
+ * 
+ *    f) If free_requests >= batch_requests, wake up a single waiter.
+ * 
+ *   The net effect is that when a process is woken at the batch_requests level,
+ *   it will be able to take approximately (batch_requests) requests before
+ *   blocking again (at the tail of the queue).
+ * 
+ *   This all assumes that the rate of taking requests is much, much higher
+ *   than the rate of releasing them.  Which is very true.
+ *
+ * -akpm, Feb 2002.
+ */
+
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+#if 0
+	register struct request *rq;
+	/*DECLARE_WAITQUEUE(wait, current);*/
+
+	generic_unplug_device(q);
+	add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait);
+	do {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (q->rq[rw].count == 0)
+			schedule();
+		spin_lock_irq(&io_request_lock);
+		rq = get_request(q,rw);
+		spin_unlock_irq(&io_request_lock);
+	} while (rq == NULL);
+	remove_wait_queue(&q->wait_for_requests[rw], &wait);
+	current->state = TASK_RUNNING;
+	return rq;
+#else
+        panic("__get_request_wait shouldn't be depended on");
+        return 0;
+#endif
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+	int minor,major;
+
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	if (major < 0 || major >= MAX_BLKDEV) return 0;
+	return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+	int minor,major;
+
+	major = MAJOR(dev);
+	minor = MINOR(dev);
+	if (major < 0 || major >= MAX_BLKDEV) return;
+	if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+	else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+				unsigned long nr_sectors, int new_io)
+{
+    /*unsigned int major = MAJOR(dev);*/
+	unsigned int index;
+
+	index = disk_index(dev);
+#if 0
+	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+		return;
+#endif
+
+#if 0
+	kstat.dk_drive[major][index] += new_io;
+	if (rw == READ) {
+		kstat.dk_drive_rio[major][index] += new_io;
+		kstat.dk_drive_rblk[major][index] += nr_sectors;
+	} else if (rw == WRITE) {
+		kstat.dk_drive_wio[major][index] += new_io;
+		kstat.dk_drive_wblk[major][index] += nr_sectors;
+	} else
+		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+#endif
+}
+
+/* Return up to two hd_structs on which to do IO accounting for a given
+ * request.  On a partitioned device, we want to account both against
+ * the partition and against the whole disk.  */
+static void locate_hd_struct(struct request *req, 
+			     struct hd_struct **hd1,
+			     struct hd_struct **hd2)
+{
+	struct gendisk *gd;
+
+	*hd1 = NULL;
+	*hd2 = NULL;
+	
+	gd = get_gendisk(req->rq_dev);
+	if (gd && gd->part) {
+		/* Mask out the partition bits: account for the entire disk */
+		int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
+		int whole_minor = devnr << gd->minor_shift;
+		*hd1 = &gd->part[whole_minor];
+		if (whole_minor != MINOR(req->rq_dev))
+			*hd2= &gd->part[MINOR(req->rq_dev)];
+	}
+}
+
+/* Round off the performance stats on an hd_struct.  The average IO
+ * queue length and utilisation statistics are maintained by observing
+ * the current state of the queue length and the amount of time it has
+ * been in this state for.  Normally, that accounting is done on IO
+ * completion, but that can result in more than a second's worth of IO
+ * being accounted for within any one second, leading to >100%
+ * utilisation.  To deal with that, we do a round-off before returning
+ * the results when reading /proc/partitions, accounting immediately for
+ * all queue usage up to the current jiffies and restarting the counters
+ * again. */
+void disk_round_stats(struct hd_struct *hd)
+{
+	unsigned long now = jiffies;
+	
+	hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
+	hd->last_queue_change = now;
+
+	if (hd->ios_in_flight)
+		hd->io_ticks += (now - hd->last_idle_time);
+	hd->last_idle_time = now;	
+}
+
+
+static inline void down_ios(struct hd_struct *hd)
+{
+	disk_round_stats(hd);	
+	--hd->ios_in_flight;
+}
+
+static inline void up_ios(struct hd_struct *hd)
+{
+	disk_round_stats(hd);
+	++hd->ios_in_flight;
+}
+
+static void account_io_start(struct hd_struct *hd, struct request *req,
+			     int merge, int sectors)
+{
+	switch (req->cmd) {
+	case READ:
+		if (merge)
+			hd->rd_merges++;
+		hd->rd_sectors += sectors;
+		break;
+	case WRITE:
+		if (merge)
+			hd->wr_merges++;
+		hd->wr_sectors += sectors;
+		break;
+	}
+	if (!merge)
+		up_ios(hd);
+}
+
+static void account_io_end(struct hd_struct *hd, struct request *req)
+{
+	unsigned long duration = jiffies - req->start_time;
+	switch (req->cmd) {
+	case READ:
+		hd->rd_ticks += duration;
+		hd->rd_ios++;
+		break;
+	case WRITE:
+		hd->wr_ticks += duration;
+		hd->wr_ios++;
+		break;
+	}
+	down_ios(hd);
+}
+
+void req_new_io(struct request *req, int merge, int sectors)
+{
+	struct hd_struct *hd1, *hd2;
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		account_io_start(hd1, req, merge, sectors);
+	if (hd2)
+		account_io_start(hd2, req, merge, sectors);
+}
+
+void req_finished_io(struct request *req)
+{
+	struct hd_struct *hd1, *hd2;
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		account_io_end(hd1, req);
+	if (hd2)	
+		account_io_end(hd2, req);
+}
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+			       struct list_head *insert_here)
+{
+	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+		spin_unlock_irq(&io_request_lock);
+		BUG();
+	}
+
+	/*
+	 * elevator indicated where it wants this request to be
+	 * inserted at elevator_merge time
+	 */
+	list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+void blkdev_release_request(struct request *req)
+{
+	request_queue_t *q = req->q;
+	int rw = req->cmd;
+
+	req->rq_status = RQ_INACTIVE;
+	req->q = NULL;
+
+	/*
+	 * Request may not have originated from ll_rw_blk. if not,
+	 * assume it has free buffers and check waiters
+	 */
+	if (q) {
+		list_add(&req->queue, &q->rq[rw].free);
+#if 0
+		if (++q->rq[rw].count >= q->batch_requests &&
+				waitqueue_active(&q->wait_for_requests[rw]))
+			wake_up(&q->wait_for_requests[rw]);
+#endif
+	}
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+			  struct request *req,
+			  int max_sectors,
+			  int max_segments)
+{
+	struct request *next;
+	struct hd_struct *hd1, *hd2;
+  
+	next = blkdev_next_request(req);
+	if (req->sector + req->nr_sectors != next->sector)
+		return;
+	if (req->cmd != next->cmd
+	    || req->rq_dev != next->rq_dev
+	    || req->nr_sectors + next->nr_sectors > max_sectors
+	    || next->waiting)
+		return;
+	/*
+	 * If we are not allowed to merge these requests, then
+	 * return.  If we are allowed to merge, then the count
+	 * will have been updated to the appropriate number,
+	 * and we shouldn't do it here too.
+	 */
+	if (!q->merge_requests_fn(q, req, next, max_segments))
+		return;
+
+	q->elevator.elevator_merge_req_fn(req, next);
+	req->bhtail->b_reqnext = next->bh;
+	req->bhtail = next->bhtail;
+	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+	list_del(&next->queue);
+
+	/* One last thing: we have removed a request, so we now have one
+	   less expected IO to complete for accounting purposes. */
+
+	locate_hd_struct(req, &hd1, &hd2);
+	if (hd1)
+		down_ios(hd1);
+	if (hd2)	
+		down_ios(hd2);
+	blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+				      struct request *req,
+				      int max_sectors,
+				      int max_segments)
+{
+	if (&req->queue == q->queue_head.prev)
+		return;
+	attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+				       struct list_head * head,
+				       struct request *req,
+				       int max_sectors,
+				       int max_segments)
+{
+	struct list_head * prev;
+
+	prev = req->queue.prev;
+	if (head == prev)
+		return;
+	attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+				  struct buffer_head * bh)
+{
+	unsigned int sector, count;
+	int max_segments = MAX_SEGMENTS;
+	struct request * req, *freereq = NULL;
+	int rw_ahead, max_sectors, el_ret;
+	struct list_head *head, *insert_here;
+	int latency;
+	elevator_t *elevator = &q->elevator;
+
+	count = bh->b_size >> 9;
+	sector = bh->b_rsector;
+
+	rw_ahead = 0;	/* normal case; gets changed below for READA */
+	switch (rw) {
+		case READA:
+#if 0	/* bread() misinterprets failed READA attempts as IO errors on SMP */
+			rw_ahead = 1;
+#endif
+			rw = READ;	/* drop into READ */
+		case READ:
+		case WRITE:
+			latency = elevator_request_latency(elevator, rw);
+			break;
+		default:
+			BUG();
+			goto end_io;
+	}
+
+	/* We'd better have a real physical mapping!
+	   Check this bit only if the buffer was dirty and just locked
+	   down by us so at this point flushpage will block and
+	   won't clear the mapped bit under us. */
+	if (!buffer_mapped(bh))
+		BUG();
+
+	/*
+	 * Temporary solution - in 2.5 this will be done by the lowlevel
+	 * driver. Create a bounce buffer if the buffer data points into
+	 * high memory - keep the original buffer otherwise.
+	 */
+#if CONFIG_HIGHMEM
+	bh = create_bounce(rw, bh);
+#endif
+
+/* look for a free request. */
+	/*
+	 * Try to coalesce the new request with old requests
+	 */
+	max_sectors = get_max_sectors(bh->b_rdev);
+
+again:
+	req = NULL;
+	head = &q->queue_head;
+	/*
+	 * Now we acquire the request spinlock, we have to be mega careful
+	 * not to schedule or do something nonatomic
+	 */
+	spin_lock_irq(&io_request_lock);
+
+	insert_here = head->prev;
+	if (list_empty(head)) {
+		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+		goto get_rq;
+	} else if (q->head_active && !q->plugged)
+		head = head->next;
+
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+	switch (el_ret) {
+
+		case ELEVATOR_BACK_MERGE:
+			if (!q->back_merge_fn(q, req, bh, max_segments)) {
+				insert_here = &req->queue;
+				break;
+			}
+			elevator->elevator_merge_cleanup_fn(q, req, count);
+			req->bhtail->b_reqnext = bh;
+			req->bhtail = bh;
+			req->nr_sectors = req->hard_nr_sectors += count;
+			blk_started_io(count);
+			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+			req_new_io(req, 1, count);
+			attempt_back_merge(q, req, max_sectors, max_segments);
+			goto out;
+
+		case ELEVATOR_FRONT_MERGE:
+			if (!q->front_merge_fn(q, req, bh, max_segments)) {
+				insert_here = req->queue.prev;
+				break;
+			}
+			elevator->elevator_merge_cleanup_fn(q, req, count);
+			bh->b_reqnext = req->bh;
+			req->bh = bh;
+			req->buffer = bh->b_data;
+			req->current_nr_sectors = count;
+			req->sector = req->hard_sector = sector;
+			req->nr_sectors = req->hard_nr_sectors += count;
+			blk_started_io(count);
+			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+			req_new_io(req, 1, count);
+			attempt_front_merge(q, head, req, max_sectors, max_segments);
+			goto out;
+
+		/*
+		 * elevator says don't/can't merge. get new request
+		 */
+		case ELEVATOR_NO_MERGE:
+			/*
+			 * use elevator hints as to where to insert the
+			 * request. if no hints, just add it to the back
+			 * of the queue
+			 */
+			if (req)
+				insert_here = &req->queue;
+			break;
+
+		default:
+			printk("elevator returned crap (%d)\n", el_ret);
+			BUG();
+	}
+		
+get_rq:
+	if (freereq) {
+		req = freereq;
+		freereq = NULL;
+	} else {
+		/*
+		 * See description above __get_request_wait()
+		 */
+		if (rw_ahead) {
+			if (q->rq[rw].count < q->batch_requests) {
+				spin_unlock_irq(&io_request_lock);
+				goto end_io;
+			}
+			req = get_request(q, rw);
+			if (req == NULL)
+				BUG();
+		} else {
+			req = get_request(q, rw);
+			if (req == NULL) {
+				spin_unlock_irq(&io_request_lock);
+				freereq = __get_request_wait(q, rw);
+				goto again;
+			}
+		}
+	}
+
+/* fill up the request-info, and add it to the queue */
+	req->elevator_sequence = latency;
+	req->cmd = rw;
+	req->errors = 0;
+	req->hard_sector = req->sector = sector;
+	req->hard_nr_sectors = req->nr_sectors = count;
+	req->current_nr_sectors = count;
+	req->nr_segments = 1; /* Always 1 for a new request. */
+	req->nr_hw_segments = 1; /* Always 1 for a new request. */
+	req->buffer = bh->b_data;
+	req->waiting = NULL;
+	req->bh = bh;
+	req->bhtail = bh;
+	req->rq_dev = bh->b_rdev;
+	req->start_time = jiffies;
+	req_new_io(req, 0, count);
+	blk_started_io(count);
+	add_request(q, req, insert_here);
+out:
+	if (freereq)
+		blkdev_release_request(freereq);
+	spin_unlock_irq(&io_request_lock);
+	return 0;
+end_io:
+	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+	return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh:  The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value.  The
+ * %READ and %WRITE options are (hopefully) obvious in meaning.  The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled.  BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector.  So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+	int major = MAJOR(bh->b_rdev);
+	int minorsize = 0;
+	request_queue_t *q;
+
+	if (!bh->b_end_io)
+		BUG();
+
+	/* Test device size, when known. */
+	if (blk_size[major])
+		minorsize = blk_size[major][MINOR(bh->b_rdev)];
+	if (minorsize) {
+		unsigned long maxsector = (minorsize << 1) + 1;
+		unsigned long sector = bh->b_rsector;
+		unsigned int count = bh->b_size >> 9;
+
+		if (maxsector < count || maxsector - count < sector) {
+			/* Yecch */
+			bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
+
+			/* This may well happen - the kernel calls bread()
+			   without checking the size of the device, e.g.,
+			   when mounting a device. */
+			DPRINTK(KERN_INFO
+			       "attempt to access beyond end of device\n");
+			DPRINTK(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+			       kdevname(bh->b_rdev), rw,
+			       (sector + count)>>1, minorsize);
+
+			/* Yecch again */
+			bh->b_end_io(bh, 0);
+			return;
+		}
+	}
+
+	/*
+	 * Resolve the mapping until finished. (drivers are
+	 * still free to implement/resolve their own stacking
+	 * by explicitly returning 0)
+	 */
+	/* NOTE: we don't repeat the blk_size check for each new device.
+	 * Stacking drivers are expected to know what they are doing.
+	 */
+	do {
+		q = blk_get_queue(bh->b_rdev);
+		if (!q || !q->make_request_fn) {
+			DPRINTK(KERN_ERR
+			       "generic_make_request: Trying to access "
+			       "nonexistent block-device %s (%ld)\n",
+			       kdevname(bh->b_rdev), bh->b_rsector);
+			buffer_IO_error(bh);
+			break;
+		}
+	} while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+	if (!test_bit(BH_Lock, &bh->b_state))
+		BUG();
+
+	set_bit(BH_Req, &bh->b_state);
+	set_bit(BH_Launder, &bh->b_state);
+
+	/*
+	 * First step, 'identity mapping' - RAID or LVM might
+	 * further remap this.
+	 */
+	bh->b_rdev = bh->b_dev;
+	/*	bh->b_rsector = bh->b_blocknr * count; */
+
+	generic_make_request(rw, bh);
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache.  In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request.  Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters.  As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ *  All of the buffers must be for the same device, and must also be
+ *  of the current approved size for the device.  */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+	unsigned int major;
+	int correct_size;
+	int i;
+
+	if (!nr)
+		return;
+
+	major = MAJOR(bhs[0]->b_dev);
+
+	/* Determine correct block size for this device. */
+	correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+	/* Verify requested block sizes. */
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+		if (bh->b_size % correct_size) {
+			DPRINTK(KERN_NOTICE "ll_rw_block: device %s: "
+			       "only %d-char blocks implemented (%u)\n",
+			       kdevname(bhs[0]->b_dev),
+			       correct_size, bh->b_size);
+			goto sorry;
+		}
+	}
+
+	if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+		DPRINTK(KERN_NOTICE "Can't write to read-only device %s\n",
+		       kdevname(bhs[0]->b_dev));
+		goto sorry;
+	}
+
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+
+		/* Only one thread can actually submit the I/O. */
+		if (test_and_set_bit(BH_Lock, &bh->b_state))
+			continue;
+
+		/* We have the buffer lock */
+		atomic_inc(&bh->b_count);
+
+		switch(rw) {
+		case WRITE:
+			if (!atomic_set_buffer_clean(bh))
+				/* Hmmph! Nothing to write */
+				goto end_io;
+			/* __mark_buffer_clean(bh); */
+			break;
+
+		case READA:
+		case READ:
+			if (buffer_uptodate(bh))
+				/* Hmmph! Already have it */
+				goto end_io;
+			break;
+		default:
+			BUG();
+	end_io:
+			bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+			continue;
+		}
+
+		submit_bh(rw, bh);
+	}
+	return;
+
+sorry:
+	/* Make sure we don't get infinite dirty retries.. */
+	for (i = 0; i < nr; i++)
+		mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req:      the request being processed
+ * @uptodate: 0 for I/O error
+ * @name:     the name printed for an I/O error
+ *
+ * Description:
+ *     Ends I/O on the first buffer attached to @req, and sets it up
+ *     for the next buffer_head (if any) in the cluster.
+ *     
+ * Return:
+ *     0 - we are done with this request, call end_that_request_last()
+ *     1 - still buffers pending for this request
+ *
+ * Caveat: 
+ *     Drivers implementing their own end_request handling must call
+ *     blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+	struct buffer_head * bh;
+	int nsect;
+
+	req->errors = 0;
+	if (!uptodate)
+		printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+			kdevname(req->rq_dev), name, req->sector);
+
+	if ((bh = req->bh) != NULL) {
+		nsect = bh->b_size >> 9;
+		blk_finished_io(nsect);
+		req->bh = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		bh->b_end_io(bh, uptodate);
+		if ((bh = req->bh) != NULL) {
+			req->hard_sector += nsect;
+			req->hard_nr_sectors -= nsect;
+			req->sector = req->hard_sector;
+			req->nr_sectors = req->hard_nr_sectors;
+
+			req->current_nr_sectors = bh->b_size >> 9;
+			if (req->nr_sectors < req->current_nr_sectors) {
+				req->nr_sectors = req->current_nr_sectors;
+				printk("end_request: buffer-list destroyed\n");
+			}
+			req->buffer = bh->b_data;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+void end_that_request_last(struct request *req)
+{
+	if (req->waiting != NULL)
+		complete(req->waiting);
+	req_finished_io(req);
+
+	blkdev_release_request(req);
+}
+
+int __init blk_dev_init(void)
+{
+	struct blk_dev_struct *dev;
+
+	request_cachep = kmem_cache_create("blkdev_requests",
+					   sizeof(struct request),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+	if (!request_cachep)
+		panic("Can't create request pool slab cache\n");
+
+	for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+		dev->queue = NULL;
+
+	memset(ro_bits,0,sizeof(ro_bits));
+	memset(max_readahead, 0, sizeof(max_readahead));
+	memset(max_sectors, 0, sizeof(max_sectors));
+
+#ifdef CONFIG_AMIGA_Z2RAM
+	z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+	stram_device_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+	isp16_init();
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
+	ide_init();		/* this MUST precede hd_init */
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
+	hd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+	ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+	xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+	mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+	{ extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+	swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+	swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+	amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+	atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+	floppy_init();
+#else
+#if defined(__i386__)	/* Do we even need this? */
+	outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+	cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+	acsi_init();
+#endif
+#ifdef CONFIG_MCD
+	mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+	mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+	sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+	aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+	sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+	gscd_init();
+#endif
+#ifdef CONFIG_CM206
+	cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+	optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+	sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+	ap_init();
+#endif
+#ifdef CONFIG_DDV
+	ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+	mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+	dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+	tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+        xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+	jsfd_init();
+#endif
+	return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_grow_request_list);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(req_finished_io);
+EXPORT_SYMBOL(generic_unplug_device);
diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c
new file mode 100644
index 0000000000..805fd9e1ae
--- /dev/null
+++ b/xen/drivers/block/xen_block.c
@@ -0,0 +1,402 @@
+/*
+ * xen-block.c
+ *
+ * process incoming block io requests from guestos's.
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/blkdev.h>
+#include <xeno/event.h>
+#include <hypervisor-ifs/block.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <asm-i386/io.h>
+#include <asm/spinlock.h>
+#include <xeno/keyhandler.h>
+#include <xeno/interrupt.h>
+
+#if 0
+#define DPRINTK(_f, _a...) printk( _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent
+ * requests pulled from a communication ring are quite likely to end
+ * up being part of the same scatter/gather request at the disc.
+ * It might be a good idea to add scatter/gather support explicitly to
+ * the scatter/gather ring (eg. each request has an array of N pointers);
+ * then these values would better reflect real costs at the disc.
+ */
+#define MAX_PENDING_REQS 32
+#define BATCH_PER_DOMAIN 8
+
+static kmem_cache_t *buffer_head_cachep;
+static atomic_t nr_pending;
+
+static void io_schedule(unsigned long unused);
+static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
+static void dispatch_rw_block_io(struct task_struct *p, int index);
+static void dispatch_probe_block_io(struct task_struct *p, int index);
+static void dispatch_debug_block_io(struct task_struct *p, int index);
+static void make_response(struct task_struct *p, void *id, unsigned long st);
+
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head io_schedule_list;
+static spinlock_t io_schedule_list_lock;
+
+static int __on_blkdev_list(struct task_struct *p)
+{
+    return p->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(struct task_struct *p)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(p) ) return;
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( __on_blkdev_list(p) )
+    {
+        list_del(&p->blkdev_list);
+        p->blkdev_list.next = NULL;
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(struct task_struct *p)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(p) ) return;
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(p) )
+    {
+        list_add_tail(&p->blkdev_list, &io_schedule_list);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
+
+static void io_schedule(unsigned long unused)
+{
+    struct task_struct *p;
+    struct list_head *ent;
+
+    /* Queue up a batch of requests. */
+    while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
+            !list_empty(&io_schedule_list) )
+    {
+        ent = io_schedule_list.next;
+        p = list_entry(ent, struct task_struct, blkdev_list);
+        remove_from_blkdev_list(p);
+        if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
+            add_to_blkdev_list_tail(p);
+    }
+
+    /* Push the batch through to disc. */
+    run_task_queue(&tq_disk);
+}
+
+static void maybe_trigger_io_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
+         !list_empty(&io_schedule_list) )
+    {
+        tasklet_schedule(&io_schedule_tasklet);
+    }
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
+{
+    struct pfn_info *page;
+    unsigned long pfn;
+
+    for ( pfn = virt_to_phys(bh->b_data) >> PAGE_SHIFT; 
+          pfn < ((virt_to_phys(bh->b_data) + bh->b_size + PAGE_SIZE - 1) >> 
+                 PAGE_SHIFT);
+          pfn++ )
+    {
+        page = frame_table + pfn;
+        if ( ((bh->b_state & (1 << BH_Read)) != 0) &&
+             (put_page_type(page) == 0) )
+            page->flags &= ~PG_type_mask;
+        put_page_tot(page);
+    }
+
+    atomic_dec(&nr_pending);
+    make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1);
+
+    kmem_cache_free(buffer_head_cachep, bh);
+
+    maybe_trigger_io_schedule();
+}
+
+
+
+/******************************************************************
+ * GUEST-OS SYSCALL -- Indicates there are requests outstanding.
+ */
+
+long do_block_io_op(void)
+{
+    add_to_blkdev_list_tail(current);
+    maybe_trigger_io_schedule();
+    return 0L;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int do_block_io_op_domain(struct task_struct* p, int max_to_do)
+{
+    blk_ring_t *blk_ring = p->blk_ring_base;
+    int i, more_to_do = 0;
+
+    for ( i = p->blk_req_cons; 
+	  i != blk_ring->req_prod; 
+	  i = BLK_RING_INC(i) ) 
+    {
+        if ( max_to_do-- == 0 )
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+	switch ( blk_ring->ring[i].req.operation )
+        {
+	case XEN_BLOCK_READ:
+	case XEN_BLOCK_WRITE:
+	    dispatch_rw_block_io(p, i);
+	    break;
+
+	case XEN_BLOCK_PROBE:
+	    dispatch_probe_block_io(p, i);
+	    break;
+
+	case XEN_BLOCK_DEBUG:
+	    dispatch_debug_block_io(p, i);
+	    break;
+
+	default:
+	    panic("error: unknown block io operation [%d]\n",
+                  blk_ring->ring[i].req.operation);
+	}
+    }
+
+    p->blk_req_cons = i;
+    return more_to_do;
+}
+
+static void dispatch_debug_block_io(struct task_struct *p, int index)
+{
+    DPRINTK("dispatch_debug_block_io: unimplemented\n"); 
+}
+
+static void dispatch_probe_block_io(struct task_struct *p, int index)
+{
+    extern void ide_probe_devices(xen_disk_info_t *xdi);
+    blk_ring_t *blk_ring = p->blk_ring_base;
+    xen_disk_info_t *xdi;
+
+    xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer);    
+    ide_probe_devices(xdi);
+
+    make_response(p, blk_ring->ring[index].req.id, 0);
+}
+
+static void dispatch_rw_block_io(struct task_struct *p, int index)
+{
+    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+    blk_ring_t *blk_ring = p->blk_ring_base;
+    struct buffer_head *bh;
+    int operation;
+    unsigned short size;
+    unsigned long buffer, pfn;
+    struct pfn_info *page;
+
+    operation = (blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE) ? 
+        WRITE : READ;
+
+    /* Sectors are 512 bytes. Make sure request size is a multiple. */
+    size = blk_ring->ring[index].req.block_size; 
+    if ( (size == 0) || (size & (0x200 - 1)) != 0 )
+    {
+	DPRINTK("dodgy block size: %d\n", 
+                blk_ring->ring[index].req.block_size);
+        goto bad_descriptor;
+    }
+
+    /* Buffer address should be sector aligned. */
+    buffer = (unsigned long)blk_ring->ring[index].req.buffer;
+    if ( (buffer & (0x200 - 1)) != 0 )
+    {
+        DPRINTK("unaligned buffer %08lx\n", buffer);
+        goto bad_descriptor;
+    }
+
+    /* A request may span multiple page frames. Each must be checked. */
+    for ( pfn = buffer >> PAGE_SHIFT; 
+          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+          pfn++ )
+    {
+        /* Each frame must be within bounds of machine memory. */
+        if ( pfn >= max_page )
+        {
+            DPRINTK("pfn out of range: %08lx\n", pfn);
+            goto bad_descriptor;
+        }
+
+        page = frame_table + pfn;
+
+        /* Each frame must belong to the requesting domain. */
+        if ( (page->flags & PG_domain_mask) != p->domain )
+        {
+            DPRINTK("bad domain: expected %d, got %ld\n", 
+                    p->domain, page->flags & PG_domain_mask);
+            goto bad_descriptor;
+        }
+
+        /* If reading into the frame, the frame must be writeable. */
+        if ( operation == READ )
+        {
+            if ( (page->flags & PG_type_mask) != PGT_writeable_page )
+            {
+                DPRINTK("non-writeable page passed for block read\n");
+                goto bad_descriptor;
+            }
+            get_page_type(page);
+        }
+
+        /* Xen holds a frame reference until the operation is complete. */
+        get_page_tot(page);
+    }
+
+    atomic_inc(&nr_pending);
+    bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+    if ( bh == NULL ) panic("bh is null\n");
+
+    /* set just the important bits of the buffer header */
+    memset (bh, 0, sizeof (struct buffer_head));
+    
+    bh->b_blocknr       = blk_ring->ring[index].req.block_number;
+    bh->b_size          = size;
+    bh->b_dev           = blk_ring->ring[index].req.device; 
+    bh->b_rsector       = blk_ring->ring[index].req.sector_number;
+    bh->b_data          = phys_to_virt(buffer);
+    bh->b_count.counter = 1;
+    bh->b_end_io        = end_block_io_op;
+
+    /* Save meta data about request. */
+    bh->b_xen_domain    = p;
+    bh->b_xen_id        = blk_ring->ring[index].req.id;
+
+    if ( operation == WRITE )
+    {
+	bh->b_state = (1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
+            (1 << BH_Dirty) | (1 << BH_Uptodate) | (1 << BH_Write);
+    } 
+    else
+    {
+	bh->b_state = (1 << BH_Mapped) | (1 << BH_Read);
+    }
+
+    /* Dispatch a single request. We'll flush it to disc later. */
+    ll_rw_block(operation, 1, &bh);
+    return;
+
+ bad_descriptor:
+    make_response(p, blk_ring->ring[index].req.id, 1);
+    return;
+}
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+static void make_response(struct task_struct *p, void *id, unsigned long st)
+{
+    unsigned long cpu_mask, flags;
+    int position;
+    blk_ring_t *blk_ring;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock_irqsave(&p->blk_ring_lock, flags);
+    blk_ring = p->blk_ring_base;
+    position = blk_ring->resp_prod;
+    blk_ring->ring[position].resp.id     = id;
+    blk_ring->ring[position].resp.status = st;
+    blk_ring->resp_prod = BLK_RING_INC(position);
+    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+    
+    /* Kick the relevant domain. */
+    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
+    guest_event_notify(cpu_mask); 
+}
+
+static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) 
+{
+    printk("Dumping block queue stats: nr_pending = %d\n",
+           atomic_read(&nr_pending));
+}
+
+/* Start-of-day initialisation for a new domain. */
+void init_blkdev_info(struct task_struct *p)
+{
+    if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
+    p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
+    clear_page(p->blk_ring_base);
+    SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain);
+    p->blkdev_list.next = NULL;
+}
+
+/* End-of-day teardown for a domain. XXX Outstanding requests? */
+void destroy_blkdev_info(struct task_struct *p)
+{
+    remove_from_blkdev_list(p);
+    UNSHARE_PFN(virt_to_page(p->blk_ring_base));
+    free_page((unsigned long)p->blk_ring_base);
+}
+
+void initialize_block_io ()
+{
+    atomic_set(&nr_pending, 0);
+
+    spin_lock_init(&io_schedule_list_lock);
+    INIT_LIST_HEAD(&io_schedule_list);
+
+    buffer_head_cachep = kmem_cache_create(
+        "buffer_head_cache", sizeof(struct buffer_head),
+        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+    
+    add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");     
+}
diff --git a/xen/drivers/char/Makefile b/xen/drivers/char/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen/drivers/char/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o driver.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/drivers/char/xen_kbd.c b/xen/drivers/char/xen_kbd.c
new file mode 100644
index 0000000000..dc9379a06c
--- /dev/null
+++ b/xen/drivers/char/xen_kbd.c
@@ -0,0 +1,191 @@
+#include <asm-i386/io.h>
+#include <xeno/sched.h>    /* this has request_irq() proto for some reason */
+#include <xeno/keyhandler.h>  
+
+#define KEYBOARD_IRQ 1
+
+#define KBD_STATUS_REG	     0x64 /* Status register (R) */
+#define KBD_CNTL_REG	     0x64 /* Controller command register (W) */
+#define KBD_DATA_REG	     0x60 /* Keyboard data register (R/W) */
+
+/* register status bits */
+#define KBD_STAT_OBF 	     0x01 /* Keyboard output buffer full */
+#define KBD_STAT_IBF 	     0x02 /* Keyboard input buffer full */
+#define KBD_STAT_SELFTEST    0x04 /* Self test successful */
+#define KBD_STAT_CMD	     0x08 /* Last write was a command write (0=data) */
+
+#define KBD_STAT_UNLOCKED    0x10 /* Zero if keyboard locked */
+#define KBD_STAT_MOUSE_OBF   0x20 /* Mouse output buffer full */
+#define KBD_STAT_GTO 	     0x40 /* General receive/xmit timeout */
+#define KBD_STAT_PERR 	     0x80 /* Parity error */
+
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+
+
+static int keyboard_shift = 0;
+static int keyboard_control = 0;
+static int keyboard_echo = 0;
+
+/* the following is pretty gross... 
+ * stop reading if you don't want to throw up!
+ */
+
+static unsigned char keymap_normal[] =
+{
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 ,'1','2', '3','4','5','6',    '7','8','9','0', '-','=','\b','\t',
+  'q','w','e','r', 't','y','u','i',    'o','p','[',']','\r', 0 ,'a','s',
+  'd','f','g','h', 'j','k','l',';',   '\'','`', 0 ,'#', 'z','x','c','v',
+  'b','n','m',',', '.','/', 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 ,'\\', 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 
+};
+
+static unsigned char keymap_shift[] =
+{
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 ,'!','"', '#','$','%','^',    '&','*','(',')', '_','+','\b','\t',
+  'Q','W','E','R', 'T','Y','U','I',    'O','P','{','}','\r', 0 ,'A','S',
+  'D','F','G','H', 'J','K','L',':',    '@', 0 , 0 ,'~', 'Z','X','C','V',
+  'B','N','M','<', '>','?', 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 ,'|', 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 
+};
+
+
+static unsigned char keymap_control[] =
+{ /* same as normal, except for a-z -> 1 to 26 */
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 ,'1','2', '3','4','5','6',    '7','8','9','0', '-','=','\b','\t',
+   17, 23, 5 , 18,  20, 25, 21, 9 ,     15, 16,'[',']','\r', 0 , 1 , 19,
+   4 , 6 , 7 , 8 ,  10, 11, 12,';',   '\'','`', 0 ,'#', 26, 24, 3 , 22,
+   2 , 14, 13,',', '.','/', 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 ,'\\', 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,
+   0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 ,     0 , 0 , 0 , 0 ,  0 , 0 , 0 , 0 
+};
+
+
+static unsigned char convert_scancode (unsigned char scancode)
+{
+    unsigned char value = 0;
+
+    switch (scancode) {
+
+    case 0xbb: /* F1 */
+	keyboard_echo = !keyboard_echo;
+	break;
+
+    case 0xba: /* caps lock UP */
+    case 0x9d: /* ctrl (left) UP */
+	keyboard_control = 0;
+	break;
+
+    case 0x3a: /* caps lock DOWN */
+    case 0x1d: /* ctrl (left) DOWN */
+	keyboard_control = 1;
+	break;
+
+    case 0xaa: /* shift (left) UP */
+    case 0xb6: /* shift (right) UP */
+	keyboard_shift = 0;
+	break;
+
+    case 0x2a: /* shift (left) DOWN */
+    case 0x36: /* shift (right) DOWN */
+	keyboard_shift = 1;
+	break;
+
+    default:   /* normal keys */
+	if (keyboard_control)
+	    value = keymap_control[scancode];
+	else if (keyboard_shift)
+	    value = keymap_shift[scancode];
+	else
+	    value = keymap_normal[scancode];
+
+    }
+
+    if (value && keyboard_echo) printk ("%c", value);
+
+    return value;
+}
+
+static void keyboard_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    unsigned char status = kbd_read_status();
+    unsigned int work = 10000;
+
+    while ((--work > 0) && (status & KBD_STAT_OBF))
+    {
+	unsigned char scancode;
+	scancode = kbd_read_input();
+	
+	if (!(status & (KBD_STAT_GTO | KBD_STAT_PERR)))
+	{
+	    if (status & KBD_STAT_MOUSE_OBF)
+		/* mouse event, ignore */;
+	    else {
+		unsigned char key; 
+		key_handler *handler; 
+		
+		if((key = convert_scancode (scancode)) && 
+		   (handler = get_key_handler(key))) 
+		    (*handler)(key, dev_id, regs); 
+		
+	    }
+	}
+	status = kbd_read_status();
+    }
+    
+    if (!work)
+	printk(KERN_ERR "pc_keyb: controller jammed (0x%02X).\n", status);
+    
+    return;
+}
+
+
+void initialize_keyboard()
+{
+    if(request_irq(KEYBOARD_IRQ, keyboard_interrupt, 0, "keyboard", NULL))
+	printk("initialize_keyboard: failed to alloc IRQ %d\n", KEYBOARD_IRQ); 
+
+    return; 
+}
+
diff --git a/xen/drivers/char/xen_serial.c b/xen/drivers/char/xen_serial.c
new file mode 100644
index 0000000000..7b7e4a7b80
--- /dev/null
+++ b/xen/drivers/char/xen_serial.c
@@ -0,0 +1,103 @@
+#include <asm-i386/io.h>
+#include <xeno/sched.h>    /* this has request_irq() proto for some reason */
+#include <xeno/keyhandler.h> 
+#include <xeno/reboot.h>
+
+/* Register offsets */
+#define NS16550_RBR	0x00	/* receive buffer	*/
+#define NS16550_THR	0x00	/* transmit holding	*/
+#define NS16550_IER	0x01	/* interrupt enable	*/
+#define NS16550_IIR	0x02	/* interrupt identity	*/
+#define NS16550_FCR     0x02    /* FIFO control         */
+#define NS16550_LCR	0x03	/* line control		*/
+#define NS16550_MCR	0x04	/* MODEM control	*/
+#define NS16550_LSR	0x05	/* line status		*/
+#define NS16550_MSR	0x06	/* MODEM status		*/
+#define NS16550_SCR	0x07	/* scratch		*/
+#define NS16550_DDL	0x00	/* divisor latch (ls) ( DLAB=1)	*/
+#define NS16550_DLM	0x01	/* divisor latch (ms) ( DLAB=1)	*/
+
+/* Interrupt enable register */
+#define NS16550_IER_ERDAI	0x01	/* rx data recv'd	*/
+#define NS16550_IER_ETHREI	0x02	/* tx reg. empty	*/
+#define NS16550_IER_ELSI	0x04	/* rx line status	*/
+#define NS16550_IER_EMSI	0x08	/* MODEM status		*/
+
+/* FIFO control register */
+#define NS16550_FCR_ENABLE      0x01    /* enable FIFO          */
+#define NS16550_FCR_CLRX        0x02    /* clear Rx FIFO        */
+#define NS16550_FCR_CLTX        0x04    /* clear Tx FIFO        */
+#define NS16550_FCR_DMA         0x10    /* enter DMA mode       */
+#define NS16550_FCR_TRG1        0x00    /* Rx FIFO trig lev 1   */
+#define NS16550_FCR_TRG4        0x40    /* Rx FIFO trig lev 4   */
+#define NS16550_FCR_TRG8        0x80    /* Rx FIFO trig lev 8   */
+#define NS16550_FCR_TRG14       0xc0    /* Rx FIFO trig lev 14  */
+
+/* MODEM control register */
+#define NS16550_MCR_DTR 	0x01	/* Data Terminal Ready	*/
+#define NS16550_MCR_RTS 	0x02	/* Request to Send	*/
+#define NS16550_MCR_OUT1        0x04    /* OUT1: unused         */
+#define NS16550_MCR_OUT2        0x08    /* OUT2: interrupt mask */
+#define NS16550_MCR_LOOP	0x10	/* Loop			*/
+
+#define SERIAL_BASE 0x3f8  /* XXX SMH: horrible hardwired COM1   */
+
+
+
+static int serial_echo = 0;   /* default is not to echo; change with 'e' */
+
+void toggle_echo(u_char key, void *dev_id, struct pt_regs *regs) 
+{
+    serial_echo = !serial_echo; 
+    return; 
+}
+
+
+
+static void serial_rx_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    u_char c; 
+    key_handler *handler; 
+
+    /* XXX SMH: should probably check this is an RX interrupt :-) */
+
+    /* clear the interrupt by reading the character */
+    c = inb(SERIAL_BASE + NS16550_RBR );
+
+    /* if there's a handler, call it: we trust it won't screw us too badly */
+    if((handler = get_key_handler(c)) != NULL) 
+	(*handler)(c, dev_id, regs); 
+
+    if(serial_echo) 
+	printk("%c", c); 
+
+    return; 
+}
+
+void initialize_serial() 
+{
+    int fifo, rc; 
+    
+    /* setup key handler */
+    add_key_handler('~', toggle_echo, "toggle serial echo");
+    
+    /* Should detect this, but must be a ns16550a at least, surely? */
+    fifo = 1;  
+    if(fifo) {
+	/* Clear FIFOs, enable, trigger at 1 byte */
+	outb(NS16550_FCR_TRG1 | NS16550_FCR_ENABLE |
+	     NS16550_FCR_CLRX  | NS16550_FCR_CLTX, SERIAL_BASE+NS16550_FCR);
+    }
+
+    outb(NS16550_MCR_OUT2, SERIAL_BASE + NS16550_MCR);   /* Modem control */
+    outb(NS16550_IER_ERDAI, SERIAL_BASE + NS16550_IER ); /* Setup interrupts */
+
+    /* XXX SMH: this is a hack; probably is IRQ4 but grab both anyway */
+    if((rc = request_irq(4, serial_rx_int, 0, "serial", (void *)0x1234)))
+	printk("initialize_serial: failed to get IRQ4, rc=%d\n", rc); 
+
+    if((rc = request_irq(3, serial_rx_int, 0, "serial", (void *)0x1234)))
+	printk("initialize_serial: failed to get IRQ3, rc=%d\n", rc); 
+    
+    return; 
+}
diff --git a/xen/drivers/ide/Makefile b/xen/drivers/ide/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen/drivers/ide/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o driver.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/drivers/ide/ide-disk.c b/xen/drivers/ide/ide-disk.c
new file mode 100644
index 0000000000..0d1cd113cd
--- /dev/null
+++ b/xen/drivers/ide/ide-disk.c
@@ -0,0 +1,1555 @@
+/*
+ *  linux/drivers/ide/ide-disk.c	Version 1.10	June 9, 2000
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Mostly written by Mark Lord <mlord@pobox.com>
+ *                and Gadi Oxman <gadio@netvision.net.il>
+ *                and Andre Hedrick <andre@linux-ide.org>
+ *
+ * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00		move disk only code from ide.c to ide-disk.c
+ *			support optional byte-swapping of all data
+ * Version 1.01		fix previous byte-swapping code
+ * Version 1.02		remove ", LBA" from drive identification msgs
+ * Version 1.03		fix display of id->buf_size for big-endian
+ * Version 1.04		add /proc configurable settings and S.M.A.R.T support
+ * Version 1.05		add capacity support for ATA3 >= 8GB
+ * Version 1.06		get boot-up messages to show full cyl count
+ * Version 1.07		disable door-locking if it fails
+ * Version 1.08		fixed CHS/LBA translations for ATA4 > 8GB,
+ *			process of adding new ATA4 compliance.
+ *			fixed problems in allowing fdisk to see
+ *			the entire disk.
+ * Version 1.09		added increment of rq->sector in ide_multwrite
+ *			added UDMA 3/4 reporting
+ * Version 1.10		request queue changes, Ultra DMA 100
+ * Version 1.11		added 48-bit lba
+ * Version 1.12		adding taskfile io access method
+ */
+
+#define IDEDISK_VERSION	"1.12"
+
+#undef REALLY_SLOW_IO		/* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030)
+#else
+#define IS_PDC4030_DRIVE (0)	/* auto-NULLs out pdc4030 code */
+#endif
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+#  undef __TASKFILE__IO /* define __TASKFILE__IO */
+#else /* CONFIG_IDE_TASKFILE_IO */
+#  undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifndef __TASKFILE__IO
+
+static void idedisk_bswap_data (void *buffer, int wcount)
+{
+	u16 *p = buffer;
+
+	while (wcount--) {
+		*p = *p << 8 | *p >> 8; p++;
+		*p = *p << 8 | *p >> 8; p++;
+	}
+}
+
+static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+	ide_input_data(drive, buffer, wcount);
+	if (drive->bswap)
+		idedisk_bswap_data(buffer, wcount);
+}
+
+static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+	if (drive->bswap) {
+		idedisk_bswap_data(buffer, wcount);
+		ide_output_data(drive, buffer, wcount);
+		idedisk_bswap_data(buffer, wcount);
+	} else
+		ide_output_data(drive, buffer, wcount);
+}
+
+#endif /* __TASKFILE__IO */
+
+/*
+ * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
+ * value for this drive (from its reported identification information).
+ *
+ * Returns:	1 if lba_capacity looks sensible
+ *		0 otherwise
+ *
+ * It is called only once for each drive.
+ */
+static int lba_capacity_is_ok (struct hd_driveid *id)
+{
+	unsigned long lba_sects, chs_sects, head, tail;
+
+	if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+		printk("48-bit Drive: %llu \n", id->lba_capacity_2);
+		return 1;
+	}
+
+	/*
+	 * The ATA spec tells large drives to return
+	 * C/H/S = 16383/16/63 independent of their size.
+	 * Some drives can be jumpered to use 15 heads instead of 16.
+	 * Some drives can be jumpered to use 4092 cyls instead of 16383.
+	 */
+	if ((id->cyls == 16383
+	     || (id->cyls == 4092 && id->cur_cyls == 16383)) &&
+	    id->sectors == 63 &&
+	    (id->heads == 15 || id->heads == 16) &&
+	    id->lba_capacity >= 16383*63*id->heads)
+		return 1;
+
+	lba_sects   = id->lba_capacity;
+	chs_sects   = id->cyls * id->heads * id->sectors;
+
+	/* perform a rough sanity check on lba_sects:  within 10% is OK */
+	if ((lba_sects - chs_sects) < chs_sects/10)
+		return 1;
+
+	/* some drives have the word order reversed */
+	head = ((lba_sects >> 16) & 0xffff);
+	tail = (lba_sects & 0xffff);
+	lba_sects = (head | (tail << 16));
+	if ((lba_sects - chs_sects) < chs_sects/10) {
+		id->lba_capacity = lba_sects;
+		return 1;	/* lba_capacity is (now) good */
+	}
+
+	return 0;	/* lba_capacity value may be bad */
+}
+
+#ifndef __TASKFILE__IO
+
+/*
+ * read_intr() is the handler for disk read/multread interrupts
+ */
+static ide_startstop_t read_intr (ide_drive_t *drive)
+{
+	byte stat;
+	int i;
+	unsigned int msect, nsect;
+	struct request *rq;
+
+	/* new way for dealing with premature shared PCI interrupts */
+	if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
+		if (stat & (ERR_STAT|DRQ_STAT)) {
+			return ide_error(drive, "read_intr", stat);
+		}
+		/* no data yet, so wait for another interrupt */
+		ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+		return ide_started;
+	}
+	msect = drive->mult_count;
+	
+read_next:
+	rq = HWGROUP(drive)->rq;
+	if (msect) {
+		if ((nsect = rq->current_nr_sectors) > msect)
+			nsect = msect;
+		msect -= nsect;
+	} else
+		nsect = 1;
+	idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
+#ifdef DEBUG
+	printk("%s:  read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
+		drive->name, rq->sector, rq->sector+nsect-1,
+		(unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
+#endif
+	rq->sector += nsect;
+	rq->buffer += nsect<<9;
+	rq->errors = 0;
+	i = (rq->nr_sectors -= nsect);
+	if (((long)(rq->current_nr_sectors -= nsect)) <= 0)
+		ide_end_request(1, HWGROUP(drive));
+	if (i > 0) {
+		if (msect)
+			goto read_next;
+		ide_set_handler (drive, &read_intr, WAIT_CMD, NULL);
+                return ide_started;
+	}
+        return ide_stopped;
+}
+
+/*
+ * write_intr() is the handler for disk write interrupts
+ */
+static ide_startstop_t write_intr (ide_drive_t *drive)
+{
+	byte stat;
+	int i;
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	struct request *rq = hwgroup->rq;
+
+	if (!OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+		printk("%s: write_intr error1: nr_sectors=%ld, stat=0x%02x\n", drive->name, rq->nr_sectors, stat);
+        } else {
+#ifdef DEBUG
+		printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n",
+			drive->name, rq->sector, (unsigned long) rq->buffer,
+			rq->nr_sectors-1);
+#endif
+		if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
+			rq->sector++;
+			rq->buffer += 512;
+			rq->errors = 0;
+			i = --rq->nr_sectors;
+			--rq->current_nr_sectors;
+			if (((long)rq->current_nr_sectors) <= 0)
+				ide_end_request(1, hwgroup);
+			if (i > 0) {
+				idedisk_output_data (drive, rq->buffer, SECTOR_WORDS);
+				ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+                                return ide_started;
+			}
+                        return ide_stopped;
+		}
+		return ide_stopped;	/* the original code did this here (?) */
+	}
+	return ide_error(drive, "write_intr", stat);
+}
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multiple-sector write operation.
+ *
+ * Returns 0 on success.
+ *
+ * Note that we may be called from two contexts - the do_rw_disk context
+ * and IRQ context. The IRQ can happen any time after we've output the
+ * full "mcount" number of sectors, so we must make sure we update the
+ * state _before_ we output the final part of the data!
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount)
+{
+ 	ide_hwgroup_t	*hwgroup= HWGROUP(drive);
+ 	struct request	*rq = &hwgroup->wrq;
+ 
+  	do {
+  		char *buffer;
+  		int nsect = rq->current_nr_sectors;
+ 
+		if (nsect > mcount)
+			nsect = mcount;
+		mcount -= nsect;
+		buffer = rq->buffer;
+
+		rq->sector += nsect;
+		rq->buffer += nsect << 9;
+		rq->nr_sectors -= nsect;
+		rq->current_nr_sectors -= nsect;
+
+		/* Do we move to the next bh after this? */
+		if (!rq->current_nr_sectors) {
+			struct buffer_head *bh = rq->bh->b_reqnext;
+
+			/* end early early we ran out of requests */
+			if (!bh) {
+				mcount = 0;
+			} else {
+				rq->bh = bh;
+				rq->current_nr_sectors = bh->b_size >> 9;
+				rq->buffer             = bh->b_data;
+			}
+		}
+
+		/*
+		 * Ok, we're all setup for the interrupt
+		 * re-entering us on the last transfer.
+		 */
+		idedisk_output_data(drive, buffer, nsect<<7);
+	} while (mcount);
+
+        return 0;
+}
+
+/*
+ * multwrite_intr() is the handler for disk multwrite interrupts
+ */
+static ide_startstop_t multwrite_intr (ide_drive_t *drive)
+{
+	byte stat;
+	int i;
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	struct request *rq = &hwgroup->wrq;
+
+	if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+		if (stat & DRQ_STAT) {
+			/*
+			 *	The drive wants data. Remember rq is the copy
+			 *	of the request
+			 */
+			if (rq->nr_sectors) {
+				if (ide_multwrite(drive, drive->mult_count))
+					return ide_stopped;
+				ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
+				return ide_started;
+			}
+		} else {
+			/*
+			 *	If the copy has all the blocks completed then
+			 *	we can end the original request.
+			 */
+			if (!rq->nr_sectors) {	/* all done? */
+				rq = hwgroup->rq;
+				for (i = rq->nr_sectors; i > 0;){
+					i -= rq->current_nr_sectors;
+					ide_end_request(1, hwgroup);
+				}
+				return ide_stopped;
+			}
+		}
+		return ide_stopped;	/* the original code did this here (?) */
+	}
+	return ide_error(drive, "multwrite_intr", stat);
+}
+#endif /* __TASKFILE__IO */
+
+#ifdef __TASKFILE__IO
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block);
+
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+	if (rq->cmd == READ)
+		goto good_command;
+	if (rq->cmd == WRITE)
+		goto good_command;
+
+	printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+	ide_end_request(0, HWGROUP(drive));
+	return ide_stopped;
+
+good_command:
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+	if (IS_PDC4030_DRIVE) {
+		extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long);
+		return promise_rw_disk(drive, rq, block);
+	}
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+	if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing))	/* 48-bit LBA */
+		return lba_48_rw_disk(drive, rq, (unsigned long long) block);
+	if (drive->select.b.lba)		/* 28-bit LBA */
+		return lba_28_rw_disk(drive, rq, (unsigned long) block);
+
+	/* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */
+	return chs_rw_disk(drive, rq, (unsigned long) block);
+}
+
+static task_ioreg_t get_command (ide_drive_t *drive, int cmd)
+{
+	int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0;
+
+#if 1
+	lba48bit = drive->addressing;
+#endif
+
+	if ((cmd == READ) && (drive->using_dma))
+		return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA;
+	else if ((cmd == READ) && (drive->mult_count))
+		return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD;
+	else if (cmd == READ)
+		return (lba48bit) ? WIN_READ_EXT : WIN_READ;
+	else if ((cmd == WRITE) && (drive->using_dma))
+		return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA;
+	else if ((cmd == WRITE) && (drive->mult_count))
+		return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE;
+	else if (cmd == WRITE)
+		return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE;
+	else
+		return WIN_NOP;
+}
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+	struct hd_drive_task_hdr	taskfile;
+	struct hd_drive_hob_hdr		hobfile;
+	ide_task_t			args;
+
+	task_ioreg_t command	= get_command(drive, rq->cmd);
+	unsigned int track	= (block / drive->sect);
+	unsigned int sect	= (block % drive->sect) + 1;
+	unsigned int head	= (track % drive->head);
+	unsigned int cyl	= (track / drive->head);
+
+	memset(&taskfile, 0, sizeof(task_struct_t));
+	memset(&hobfile, 0, sizeof(hob_struct_t));
+
+	taskfile.sector_count	= (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+	taskfile.sector_number	= sect;
+	taskfile.low_cylinder	= cyl;
+	taskfile.high_cylinder	= (cyl>>8);
+	taskfile.device_head	= head;
+	taskfile.device_head	|= drive->select.all;
+	taskfile.command	= command;
+
+	/* #ifdef DEBUG */
+	printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+	if (lba)	printk("LBAsect=%lld, ", block);
+	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
+	printk("sectors=%ld, ", rq->nr_sectors);
+	printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+	/* #endif*/
+
+	memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+	memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+	args.command_type	= ide_cmd_type_parser(&args);
+	args.prehandler		= ide_pre_handler_parser(&taskfile, &hobfile);
+	args.handler		= ide_handler_parser(&taskfile, &hobfile);
+	args.posthandler	= NULL;
+	args.rq			= (struct request *) rq;
+	args.block		= block;
+	rq->special		= NULL;
+	rq->special		= (ide_task_t *)&args;
+
+	return do_rw_taskfile(drive, &args);
+}
+
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+	struct hd_drive_task_hdr	taskfile;
+	struct hd_drive_hob_hdr		hobfile;
+	ide_task_t			args;
+
+	task_ioreg_t command	= get_command(drive, rq->cmd);
+
+	memset(&taskfile, 0, sizeof(task_struct_t));
+	memset(&hobfile, 0, sizeof(hob_struct_t));
+
+	taskfile.sector_count	= (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+	taskfile.sector_number	= block;
+	taskfile.low_cylinder	= (block>>=8);
+	taskfile.high_cylinder	= (block>>=8);
+	taskfile.device_head	= ((block>>8)&0x0f);
+	taskfile.device_head	|= drive->select.all;
+	taskfile.command	= command;
+
+
+#ifdef DEBUG
+	printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+	if (lba)	printk("LBAsect=%lld, ", block);
+	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
+	printk("sectors=%ld, ", rq->nr_sectors);
+	printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+	memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+	memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+	args.command_type	= ide_cmd_type_parser(&args);
+	args.prehandler		= ide_pre_handler_parser(&taskfile, &hobfile);
+	args.handler		= ide_handler_parser(&taskfile, &hobfile);
+	args.posthandler	= NULL;
+	args.rq			= (struct request *) rq;
+	args.block		= block;
+	rq->special		= NULL;
+	rq->special		= (ide_task_t *)&args;
+
+	return do_rw_taskfile(drive, &args);
+}
+
+/*
+ * 268435455  == 137439 MB or 28bit limit
+ * 320173056  == 163929 MB or 48bit addressing
+ * 1073741822 == 549756 MB or 48bit addressing fake drive
+ */
+
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block)
+{
+	struct hd_drive_task_hdr	taskfile;
+	struct hd_drive_hob_hdr		hobfile;
+	ide_task_t			args;
+
+	task_ioreg_t command	= get_command(drive, rq->cmd);
+
+	memset(&taskfile, 0, sizeof(task_struct_t));
+	memset(&hobfile, 0, sizeof(hob_struct_t));
+
+	taskfile.sector_count	= rq->nr_sectors;
+	hobfile.sector_count	= (rq->nr_sectors>>8);
+
+	if (rq->nr_sectors == 65536) {
+		taskfile.sector_count	= 0x00;
+		hobfile.sector_count	= 0x00;
+	}
+
+	taskfile.sector_number	= block;	/* low lba */
+	taskfile.low_cylinder	= (block>>=8);	/* mid lba */
+	taskfile.high_cylinder	= (block>>=8);	/* hi  lba */
+	hobfile.sector_number	= (block>>=8);	/* low lba */
+	hobfile.low_cylinder	= (block>>=8);	/* mid lba */
+	hobfile.high_cylinder	= (block>>=8);	/* hi  lba */
+	taskfile.device_head	= drive->select.all;
+	hobfile.device_head	= taskfile.device_head;
+	hobfile.control		= (drive->ctl|0x80);
+	taskfile.command	= command;
+
+#ifdef DEBUG
+	printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+	if (lba)	printk("LBAsect=%lld, ", block);
+	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
+	printk("sectors=%ld, ", rq->nr_sectors);
+	printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+	memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+	memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+	args.command_type	= ide_cmd_type_parser(&args);
+	args.prehandler		= ide_pre_handler_parser(&taskfile, &hobfile);
+	args.handler		= ide_handler_parser(&taskfile, &hobfile);
+	args.posthandler	= NULL;
+	args.rq			= (struct request *) rq;
+	args.block		= block;
+	rq->special		= NULL;
+	rq->special		= (ide_task_t *)&args;
+
+	return do_rw_taskfile(drive, &args);
+}
+
+#else /* !__TASKFILE__IO */
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+	if (IDE_CONTROL_REG)
+		OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+	if (drive->select.b.lba || IS_PDC4030_DRIVE) {
+#else /* !CONFIG_BLK_DEV_PDC4030 */
+	if (drive->select.b.lba) {
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+		if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+			task_ioreg_t tasklets[10];
+
+			tasklets[0] = 0;
+			tasklets[1] = 0;
+			tasklets[2] = rq->nr_sectors;
+			tasklets[3] = (rq->nr_sectors>>8);
+			if (rq->nr_sectors == 65536) {
+				tasklets[2] = 0x00;
+				tasklets[3] = 0x00;
+			}
+			tasklets[4] = (task_ioreg_t) block;
+			tasklets[5] = (task_ioreg_t) (block>>8);
+			tasklets[6] = (task_ioreg_t) (block>>16);
+			tasklets[7] = (task_ioreg_t) (block>>24);
+			tasklets[8] = (task_ioreg_t) 0;
+			tasklets[9] = (task_ioreg_t) 0;
+//			tasklets[8] = (task_ioreg_t) (block>>32);
+//			tasklets[9] = (task_ioreg_t) (block>>40);
+			/*#ifdef DEBUG */
+			printk("[A]\n");
+			printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n",
+				drive->name,
+				(rq->cmd==READ)?"read":"writ",
+				block,
+				rq->nr_sectors,
+				(unsigned long) rq->buffer,
+				block);
+			printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n",
+				drive->name, tasklets[3], tasklets[2],
+				tasklets[9], tasklets[8], tasklets[7],
+				tasklets[6], tasklets[5], tasklets[4]);
+			/* #endif */
+			OUT_BYTE(tasklets[1], IDE_FEATURE_REG);
+			OUT_BYTE(tasklets[3], IDE_NSECTOR_REG);
+			OUT_BYTE(tasklets[7], IDE_SECTOR_REG);
+			OUT_BYTE(tasklets[8], IDE_LCYL_REG);
+			OUT_BYTE(tasklets[9], IDE_HCYL_REG);
+
+			OUT_BYTE(tasklets[0], IDE_FEATURE_REG);
+			OUT_BYTE(tasklets[2], IDE_NSECTOR_REG);
+			OUT_BYTE(tasklets[4], IDE_SECTOR_REG);
+			OUT_BYTE(tasklets[5], IDE_LCYL_REG);
+			OUT_BYTE(tasklets[6], IDE_HCYL_REG);
+			OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG);
+		} else {
+#ifdef DEBUG
+			printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
+				drive->name, (rq->cmd==READ)?"read":"writ",
+				block, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+			OUT_BYTE(0x00, IDE_FEATURE_REG);
+			OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+			OUT_BYTE(block,IDE_SECTOR_REG);
+			OUT_BYTE(block>>=8,IDE_LCYL_REG);
+			OUT_BYTE(block>>=8,IDE_HCYL_REG);
+			OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG);
+		}
+	} else {
+		unsigned int sect,head,cyl,track;
+		track = block / drive->sect;
+		sect  = block % drive->sect + 1;
+		OUT_BYTE(sect,IDE_SECTOR_REG);
+		head  = track % drive->head;
+		cyl   = track / drive->head;
+
+		OUT_BYTE(0x00, IDE_FEATURE_REG);
+		OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+		OUT_BYTE(cyl,IDE_LCYL_REG);
+		OUT_BYTE(cyl>>8,IDE_HCYL_REG);
+		OUT_BYTE(head|drive->select.all,IDE_SELECT_REG);
+#ifdef DEBUG
+		printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
+			drive->name, (rq->cmd==READ)?"read":"writ", cyl,
+			head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+	}
+#ifdef CONFIG_BLK_DEV_PDC4030
+	if (IS_PDC4030_DRIVE) {
+		extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *);
+		return do_pdc4030_io (drive, rq);
+	}
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+	if (rq->cmd == READ) {
+#ifdef CONFIG_BLK_DEV_IDEDMA
+		if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
+			return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+		ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+		if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+			OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG);
+		} else {
+			OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
+		}
+		return ide_started;
+	}
+	if (rq->cmd == WRITE) {
+		ide_startstop_t startstop;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+		if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
+			return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+		if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+			OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG);
+		} else {
+			OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
+		}
+		if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+			printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name,
+				drive->mult_count ? "MULTWRITE" : "WRITE");
+			return startstop;
+		}
+		if (!drive->unmask)
+			__cli();	/* local CPU only */
+		if (drive->mult_count) {
+			ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	/*
+	 * Ugh.. this part looks ugly because we MUST set up
+	 * the interrupt handler before outputting the first block
+	 * of data to be written.  If we hit an error (corrupted buffer list)
+	 * in ide_multwrite(), then we need to remove the handler/timer
+	 * before returning.  Fortunately, this NEVER happens (right?).
+	 *
+	 * Except when you get an error it seems...
+	 */
+			hwgroup->wrq = *rq; /* scratchpad */
+			ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL);
+			if (ide_multwrite(drive, drive->mult_count)) {
+				unsigned long flags;
+				spin_lock_irqsave(&io_request_lock, flags);
+				hwgroup->handler = NULL;
+				del_timer(&hwgroup->timer);
+				spin_unlock_irqrestore(&io_request_lock, flags);
+				return ide_stopped;
+			}
+		} else {
+			ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+			idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
+		}
+		return ide_started;
+	}
+	printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+	ide_end_request(0, HWGROUP(drive));
+	return ide_stopped;
+}
+
+#endif /* __TASKFILE__IO */
+
+static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+	MOD_INC_USE_COUNT;
+	if (drive->removable && drive->usage == 1) {
+		struct hd_drive_task_hdr taskfile;
+		struct hd_drive_hob_hdr hobfile;
+		memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+		memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+		taskfile.command = WIN_DOORLOCK;
+		check_disk_change(inode->i_rdev);
+		/*
+		 * Ignore the return code from door_lock,
+		 * since the open() has already succeeded,
+		 * and the door_lock is irrelevant at this point.
+		 */
+		if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+			drive->doorlocking = 0;
+	}
+	return 0;
+}
+
+static int do_idedisk_flushcache(ide_drive_t *drive);
+
+static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+	if (drive->removable && !drive->usage) {
+		struct hd_drive_task_hdr taskfile;
+		struct hd_drive_hob_hdr hobfile;
+		memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+		memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+		taskfile.command = WIN_DOORUNLOCK;
+		invalidate_bdev(inode->i_bdev, 0);
+		if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+			drive->doorlocking = 0;
+	}
+	if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+		if (do_idedisk_flushcache(drive))
+			printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+				drive->name);
+	MOD_DEC_USE_COUNT;
+}
+
+static int idedisk_media_change (ide_drive_t *drive)
+{
+	return drive->removable;	/* if removable, always assume it was changed */
+}
+
+static void idedisk_revalidate (ide_drive_t *drive)
+{
+	grok_partitions(HWIF(drive)->gd, drive->select.b.unit,
+			1<<PARTN_BITS,
+			current_capacity(drive));
+}
+
+/*
+ * Queries for true maximum capacity of the drive.
+ * Returns maximum LBA address (> 0) of the drive, 0 if failed.
+ */
+static unsigned long idedisk_read_native_max_address(ide_drive_t *drive)
+{
+	ide_task_t args;
+	unsigned long addr = 0;
+
+	if (!(drive->id->command_set_1 & 0x0400) &&
+	    !(drive->id->cfs_enable_2 & 0x0100))
+		return addr;
+
+	/* Create IDE/ATA command request structure */
+	memset(&args, 0, sizeof(ide_task_t));
+	args.tfRegister[IDE_SELECT_OFFSET]	= 0x40;
+	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_READ_NATIVE_MAX;
+	args.handler				= task_no_data_intr;
+
+	/* submit command request */
+	ide_raw_taskfile(drive, &args, NULL);
+
+	/* if OK, compute maximum address value */
+	if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+		addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+		     | ((args.tfRegister[  IDE_HCYL_OFFSET]       ) << 16)
+		     | ((args.tfRegister[  IDE_LCYL_OFFSET]       ) <<  8)
+		     | ((args.tfRegister[IDE_SECTOR_OFFSET]       ));
+	}
+	addr++;	/* since the return value is (maxlba - 1), we add 1 */
+	return addr;
+}
+
+static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive)
+{
+	ide_task_t args;
+	unsigned long long addr = 0;
+
+	/* Create IDE/ATA command request structure */
+	memset(&args, 0, sizeof(ide_task_t));
+
+	args.tfRegister[IDE_SELECT_OFFSET]	= 0x40;
+	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_READ_NATIVE_MAX_EXT;
+	args.handler				= task_no_data_intr;
+
+        /* submit command request */
+        ide_raw_taskfile(drive, &args, NULL);
+
+	/* if OK, compute maximum address value */
+	if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+		u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+			   ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+  			    (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); 
+		u32 low  = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+			   ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+			    (args.tfRegister[IDE_SECTOR_OFFSET]);
+		addr = ((__u64)high << 24) | low;
+	}
+	addr++;	/* since the return value is (maxlba - 1), we add 1 */
+	return addr;
+}
+
+#ifdef CONFIG_IDEDISK_STROKE
+/*
+ * Sets maximum virtual LBA address of the drive.
+ * Returns new maximum virtual LBA address (> 0) or 0 on failure.
+ */
+static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req)
+{
+	ide_task_t args;
+	unsigned long addr_set = 0;
+	
+	addr_req--;
+	/* Create IDE/ATA command request structure */
+	memset(&args, 0, sizeof(ide_task_t));
+	args.tfRegister[IDE_SECTOR_OFFSET]	= ((addr_req >>  0) & 0xff);
+	args.tfRegister[IDE_LCYL_OFFSET]	= ((addr_req >>  8) & 0xff);
+	args.tfRegister[IDE_HCYL_OFFSET]	= ((addr_req >> 16) & 0xff);
+	args.tfRegister[IDE_SELECT_OFFSET]	= ((addr_req >> 24) & 0x0f) | 0x40;
+	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_SET_MAX;
+	args.handler				= task_no_data_intr;
+	/* submit command request */
+	ide_raw_taskfile(drive, &args, NULL);
+	/* if OK, read new maximum address value */
+	if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+		addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+			 | ((args.tfRegister[  IDE_HCYL_OFFSET]       ) << 16)
+			 | ((args.tfRegister[  IDE_LCYL_OFFSET]       ) <<  8)
+			 | ((args.tfRegister[IDE_SECTOR_OFFSET]       ));
+	}
+	addr_set++;
+	return addr_set;
+}
+
+static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req)
+{
+	ide_task_t args;
+	unsigned long long addr_set = 0;
+
+	addr_req--;
+	/* Create IDE/ATA command request structure */
+	memset(&args, 0, sizeof(ide_task_t));
+	args.tfRegister[IDE_SECTOR_OFFSET]	= ((addr_req >>  0) & 0xff);
+	args.tfRegister[IDE_LCYL_OFFSET]	= ((addr_req >>= 8) & 0xff);
+	args.tfRegister[IDE_HCYL_OFFSET]	= ((addr_req >>= 8) & 0xff);
+	args.tfRegister[IDE_SELECT_OFFSET]      = 0x40;
+	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_SET_MAX_EXT;
+	args.hobRegister[IDE_SECTOR_OFFSET_HOB]	= ((addr_req >>= 8) & 0xff);
+	args.hobRegister[IDE_LCYL_OFFSET_HOB]	= ((addr_req >>= 8) & 0xff);
+	args.hobRegister[IDE_HCYL_OFFSET_HOB]	= ((addr_req >>= 8) & 0xff);
+	args.hobRegister[IDE_SELECT_OFFSET_HOB]	= 0x40;
+	args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80);
+        args.handler				= task_no_data_intr;
+	/* submit command request */
+	ide_raw_taskfile(drive, &args, NULL);
+	/* if OK, compute maximum address value */
+	if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+		u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+			   ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+			    (args.hobRegister[IDE_SECTOR_OFFSET_HOB]);
+		u32 low  = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+			   ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+			    (args.tfRegister[IDE_SECTOR_OFFSET]);
+		addr_set = ((__u64)high << 24) | low;
+	}
+	return addr_set;
+}
+
+/*
+ * Tests if the drive supports Host Protected Area feature.
+ * Returns true if supported, false otherwise.
+ */
+static inline int idedisk_supports_host_protected_area(ide_drive_t *drive)
+{
+	int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0;
+	printk("%s: host protected area => %d\n", drive->name, flag);
+	return flag;
+}
+
+#endif /* CONFIG_IDEDISK_STROKE */
+
+/*
+ * Compute drive->capacity, the full capacity of the drive
+ * Called with drive->id != NULL.
+ *
+ * To compute capacity, this uses either of
+ *
+ *    1. CHS value set by user       (whatever user sets will be trusted)
+ *    2. LBA value from target drive (require new ATA feature)
+ *    3. LBA value from system BIOS  (new one is OK, old one may break)
+ *    4. CHS value from system BIOS  (traditional style)
+ *
+ * in above order (i.e., if value of higher priority is available,
+ * reset will be ignored).
+ */
+static void init_idedisk_capacity (ide_drive_t  *drive)
+{
+	struct hd_driveid *id = drive->id;
+	unsigned long capacity = drive->cyl * drive->head * drive->sect;
+	unsigned long set_max = idedisk_read_native_max_address(drive);
+	unsigned long long capacity_2 = capacity;
+	unsigned long long set_max_ext;
+
+	drive->capacity48 = 0;
+	drive->select.b.lba = 0;
+
+	if (id->cfs_enable_2 & 0x0400) {
+		capacity_2 = id->lba_capacity_2;
+		drive->head		= drive->bios_head = 255;
+		drive->sect		= drive->bios_sect = 63;
+		drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect);
+		drive->select.b.lba	= 1;
+		set_max_ext = idedisk_read_native_max_address_ext(drive);
+		if (set_max_ext > capacity_2) {
+#ifdef CONFIG_IDEDISK_STROKE
+			set_max_ext = idedisk_read_native_max_address_ext(drive);
+			set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext);
+			if (set_max_ext) {
+				drive->capacity48 = capacity_2 = set_max_ext;
+				drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect);
+				drive->select.b.lba = 1;
+				drive->id->lba_capacity_2 = capacity_2;
+                        }
+#else /* !CONFIG_IDEDISK_STROKE */
+			printk("%s: setmax_ext LBA %llu, native  %llu\n",
+				drive->name, set_max_ext, capacity_2);
+#endif /* CONFIG_IDEDISK_STROKE */
+		}
+		drive->bios_cyl		= drive->cyl;
+		drive->capacity48	= capacity_2;
+		drive->capacity		= (unsigned long) capacity_2;
+		return;
+	/* Determine capacity, and use LBA if the drive properly supports it */
+	} else if ((id->capability & 2) && lba_capacity_is_ok(id)) {
+		capacity = id->lba_capacity;
+		drive->cyl = capacity / (drive->head * drive->sect);
+		drive->select.b.lba = 1;
+	}
+
+	if (set_max > capacity) {
+#ifdef CONFIG_IDEDISK_STROKE
+		set_max = idedisk_read_native_max_address(drive);
+		set_max = idedisk_set_max_address(drive, set_max);
+		if (set_max) {
+			drive->capacity = capacity = set_max;
+			drive->cyl = set_max / (drive->head * drive->sect);
+			drive->select.b.lba = 1;
+			drive->id->lba_capacity = capacity;
+		}
+#else /* !CONFIG_IDEDISK_STROKE */
+		printk("%s: setmax LBA %lu, native  %lu\n",
+			drive->name, set_max, capacity);
+#endif /* CONFIG_IDEDISK_STROKE */
+	}
+
+	drive->capacity = capacity;
+
+	if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+                drive->capacity48 = id->lba_capacity_2;
+		drive->head = 255;
+		drive->sect = 63;
+		drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect);
+	}
+}
+
+static unsigned long idedisk_capacity (ide_drive_t *drive)
+{
+	if (drive->id->cfs_enable_2 & 0x0400)
+		return (drive->capacity48 - drive->sect0);
+	return (drive->capacity - drive->sect0);
+}
+
+static ide_startstop_t idedisk_special (ide_drive_t *drive)
+{
+	special_t *s = &drive->special;
+
+	if (s->b.set_geometry) {
+		struct hd_drive_task_hdr taskfile;
+		struct hd_drive_hob_hdr hobfile;
+		ide_handler_t *handler = NULL;
+
+		memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+		memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+		s->b.set_geometry	= 0;
+		taskfile.sector_number	= drive->sect;
+		taskfile.low_cylinder	= drive->cyl;
+		taskfile.high_cylinder	= drive->cyl>>8;
+		taskfile.device_head	= ((drive->head-1)|drive->select.all)&0xBF;
+		if (!IS_PDC4030_DRIVE) {
+			taskfile.sector_count	= drive->sect;
+			taskfile.command	= WIN_SPECIFY;
+			handler			= ide_handler_parser(&taskfile, &hobfile);
+		}
+		do_taskfile(drive, &taskfile, &hobfile, handler);
+	} else if (s->b.recalibrate) {
+		s->b.recalibrate = 0;
+		if (!IS_PDC4030_DRIVE) {
+			struct hd_drive_task_hdr taskfile;
+			struct hd_drive_hob_hdr hobfile;
+			memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+			memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+			taskfile.sector_count	= drive->sect;
+			taskfile.command	= WIN_RESTORE;
+			do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+		}
+	} else if (s->b.set_multmode) {
+		s->b.set_multmode = 0;
+		if (drive->id && drive->mult_req > drive->id->max_multsect)
+			drive->mult_req = drive->id->max_multsect;
+		if (!IS_PDC4030_DRIVE) {
+			struct hd_drive_task_hdr taskfile;
+			struct hd_drive_hob_hdr hobfile;
+			memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+			memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+			taskfile.sector_count	= drive->mult_req;
+			taskfile.command	= WIN_SETMULT;
+			do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+		}
+	} else if (s->all) {
+		int special = s->all;
+		s->all = 0;
+		printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
+		return ide_stopped;
+	}
+	return IS_PDC4030_DRIVE ? ide_stopped : ide_started;
+}
+
+static void idedisk_pre_reset (ide_drive_t *drive)
+{
+	int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1;
+
+	drive->special.all = 0;
+	drive->special.b.set_geometry = legacy;
+	drive->special.b.recalibrate  = legacy;
+	if (OK_TO_RESET_CONTROLLER)
+		drive->mult_count = 0;
+	if (!drive->keep_settings && !drive->using_dma)
+		drive->mult_req = 0;
+	if (drive->mult_req != drive->mult_count)
+		drive->special.b.set_multmode = 1;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int smart_enable(ide_drive_t *drive)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	taskfile.feature	= SMART_ENABLE;
+	taskfile.low_cylinder	= SMART_LCYL_PASS;
+	taskfile.high_cylinder	= SMART_HCYL_PASS;
+	taskfile.command	= WIN_SMART;
+	return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int get_smart_values(ide_drive_t *drive, byte *buf)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	taskfile.feature	= SMART_READ_VALUES;
+	taskfile.sector_count	= 0x01;
+	taskfile.low_cylinder	= SMART_LCYL_PASS;
+	taskfile.high_cylinder	= SMART_HCYL_PASS;
+	taskfile.command	= WIN_SMART;
+	(void) smart_enable(drive);
+	return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int get_smart_thresholds(ide_drive_t *drive, byte *buf)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	taskfile.feature	= SMART_READ_THRESHOLDS;
+	taskfile.sector_count	= 0x01;
+	taskfile.low_cylinder	= SMART_LCYL_PASS;
+	taskfile.high_cylinder	= SMART_HCYL_PASS;
+	taskfile.command	= WIN_SMART;
+	(void) smart_enable(drive);
+	return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int proc_idedisk_read_cache
+	(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	ide_drive_t	*drive = (ide_drive_t *) data;
+	char		*out = page;
+	int		len;
+
+	if (drive->id)
+		len = sprintf(out,"%i\n", drive->id->buf_size / 2);
+	else
+		len = sprintf(out,"(none)\n");
+	PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_thresholds
+	(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	ide_drive_t	*drive = (ide_drive_t *)data;
+	int		len = 0, i = 0;
+
+	if (!get_smart_thresholds(drive, page)) {
+		unsigned short *val = (unsigned short *) page;
+		char *out = ((char *)val) + (SECTOR_WORDS * 4);
+		page = out;
+		do {
+			out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+			val += 1;
+		} while (i < (SECTOR_WORDS * 2));
+		len = out - page;
+	}
+	PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_values
+	(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	ide_drive_t	*drive = (ide_drive_t *)data;
+	int		len = 0, i = 0;
+
+	if (!get_smart_values(drive, page)) {
+		unsigned short *val = (unsigned short *) page;
+		char *out = ((char *)val) + (SECTOR_WORDS * 4);
+		page = out;
+		do {
+			out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+			val += 1;
+		} while (i < (SECTOR_WORDS * 2));
+		len = out - page;
+	}
+	PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static ide_proc_entry_t idedisk_proc[] = {
+	{ "cache",		S_IFREG|S_IRUGO,	proc_idedisk_read_cache,		NULL },
+	{ "geometry",		S_IFREG|S_IRUGO,	proc_ide_read_geometry,			NULL },
+	{ "smart_values",	S_IFREG|S_IRUSR,	proc_idedisk_read_smart_values,		NULL },
+	{ "smart_thresholds",	S_IFREG|S_IRUSR,	proc_idedisk_read_smart_thresholds,	NULL },
+	{ NULL, 0, NULL, NULL }
+};
+
+#else
+
+#define	idedisk_proc	NULL
+
+#endif	/* CONFIG_PROC_FS */
+
+static int set_multcount(ide_drive_t *drive, int arg)
+{
+#ifdef __TASKFILE__IO
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+
+	if (drive->special.b.set_multmode)
+		return -EBUSY;
+
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	taskfile.sector_count	= drive->mult_req;
+	taskfile.command	= WIN_SETMULT;
+	drive->mult_req		= arg;
+	drive->special.b.set_multmode = 1;
+	ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+#else /* !__TASKFILE__IO */
+	struct request rq;
+
+	if (drive->special.b.set_multmode)
+		return -EBUSY;
+	ide_init_drive_cmd (&rq);
+	rq.cmd = IDE_DRIVE_CMD;
+	drive->mult_req = arg;
+	drive->special.b.set_multmode = 1;
+	(void) ide_do_drive_cmd (drive, &rq, ide_wait);
+#endif /* __TASKFILE__IO */
+	return (drive->mult_count == arg) ? 0 : -EIO;
+}
+
+static int set_nowerr(ide_drive_t *drive, int arg)
+{
+	if (ide_spin_wait_hwgroup(drive))
+		return -EBUSY;
+	drive->nowerr = arg;
+	drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
+	spin_unlock_irq(&io_request_lock);
+	return 0;
+}
+
+static int write_cache (ide_drive_t *drive, int arg)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	taskfile.feature	= (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE;
+	taskfile.command	= WIN_SETFEATURES;
+
+	if (!(drive->id->cfs_enable_2 & 0x3000))
+		return 1;
+
+	(void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+	drive->wcache = arg;
+	return 0;
+}
+
+static int do_idedisk_standby (ide_drive_t *drive)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	taskfile.command	= WIN_STANDBYNOW1;
+	return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int do_idedisk_flushcache (ide_drive_t *drive)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+	if (drive->id->cfs_enable_2 & 0x2400) {
+		taskfile.command	= WIN_FLUSH_CACHE_EXT;
+	} else {
+		taskfile.command	= WIN_FLUSH_CACHE;
+	}
+	return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int set_acoustic (ide_drive_t *drive, int arg)
+{
+	struct hd_drive_task_hdr taskfile;
+	struct hd_drive_hob_hdr hobfile;
+	memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+	memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+	taskfile.feature	= (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM;
+	taskfile.sector_count	= arg;
+
+	taskfile.command	= WIN_SETFEATURES;
+	(void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+	drive->acoustic = arg;
+	return 0;
+}
+
+static int probe_lba_addressing (ide_drive_t *drive, int arg)
+{
+	drive->addressing =  0;
+
+	if (!(drive->id->cfs_enable_2 & 0x0400))
+                return -EIO;
+
+	drive->addressing = arg;
+	return 0;
+}
+
+static int set_lba_addressing (ide_drive_t *drive, int arg)
+{
+	return (probe_lba_addressing(drive, arg));
+}
+
+static void idedisk_add_settings(ide_drive_t *drive)
+{
+	struct hd_driveid *id = drive->id;
+#if 0
+	int major = HWIF(drive)->major;
+	int minor = drive->select.b.unit << PARTN_BITS;
+#endif
+
+	ide_add_setting(drive,	"bios_cyl",		SETTING_RW,					-1,			-1,			TYPE_INT,	0,	65535,				1,	1,	&drive->bios_cyl,		NULL);
+	ide_add_setting(drive,	"bios_head",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	255,				1,	1,	&drive->bios_head,		NULL);
+	ide_add_setting(drive,	"bios_sect",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	63,				1,	1,	&drive->bios_sect,		NULL);
+	ide_add_setting(drive,	"address",		SETTING_RW,					HDIO_GET_ADDRESS,	HDIO_SET_ADDRESS,	TYPE_INTA,	0,	2,				1,	1,	&drive->addressing,	set_lba_addressing);
+	ide_add_setting(drive,	"bswap",		SETTING_READ,					-1,			-1,			TYPE_BYTE,	0,	1,				1,	1,	&drive->bswap,			NULL);
+	ide_add_setting(drive,	"multcount",		id ? SETTING_RW : SETTING_READ,			HDIO_GET_MULTCOUNT,	HDIO_SET_MULTCOUNT,	TYPE_BYTE,	0,	id ? id->max_multsect : 0,	1,	1,	&drive->mult_count,		set_multcount);
+	ide_add_setting(drive,	"nowerr",		SETTING_RW,					HDIO_GET_NOWERR,	HDIO_SET_NOWERR,	TYPE_BYTE,	0,	1,				1,	1,	&drive->nowerr,			set_nowerr);
+#if 0
+	ide_add_setting(drive,	"breada_readahead",	SETTING_RW,					BLKRAGET,		BLKRASET,		TYPE_INT,	0,	255,				1,	1,	&read_ahead[major],		NULL);
+	ide_add_setting(drive,	"file_readahead",	SETTING_RW,					BLKFRAGET,		BLKFRASET,		TYPE_INTA,	0,	4096,			PAGE_SIZE,	1024,	&max_readahead[major][minor],	NULL);
+	ide_add_setting(drive,	"max_kb_per_request",	SETTING_RW,					BLKSECTGET,		BLKSECTSET,		TYPE_INTA,	1,	255,				1,	1,	&max_sectors[major][minor],	NULL);
+#endif
+	ide_add_setting(drive,	"lun",			SETTING_RW,					-1,			-1,			TYPE_INT,	0,	7,				1,	1,	&drive->lun,			NULL);
+	ide_add_setting(drive,	"wcache",		SETTING_RW,					HDIO_GET_WCACHE,	HDIO_SET_WCACHE,	TYPE_BYTE,	0,	1,				1,	1,	&drive->wcache,			write_cache);
+	ide_add_setting(drive,	"acoustic",		SETTING_RW,					HDIO_GET_ACOUSTIC,	HDIO_SET_ACOUSTIC,	TYPE_BYTE,	0,	254,				1,	1,	&drive->acoustic,		set_acoustic);
+ 	ide_add_setting(drive,	"failures",		SETTING_RW,					-1,			-1,			TYPE_INT,	0,	65535,				1,	1,	&drive->failures,		NULL);
+ 	ide_add_setting(drive,	"max_failures",		SETTING_RW,					-1,			-1,			TYPE_INT,	0,	65535,				1,	1,	&drive->max_failures,		NULL);
+}
+
+static void idedisk_setup (ide_drive_t *drive)
+{
+	int i;
+	
+	struct hd_driveid *id = drive->id;
+	unsigned long capacity;
+
+	printk (KERN_ALERT
+		"ide-disk.c::idedisk_setup: chs %d %d %d\n",
+		drive->cyl, drive->head, drive->sect);
+	
+	idedisk_add_settings(drive);
+
+	if (id == NULL)
+		return;
+
+	/*
+	 * CompactFlash cards and their brethern look just like hard drives
+	 * to us, but they are removable and don't have a doorlock mechanism.
+	 */
+	if (drive->removable && !drive_is_flashcard(drive)) {
+		/*
+		 * Removable disks (eg. SYQUEST); ignore 'WD' drives 
+		 */
+		if (id->model[0] != 'W' || id->model[1] != 'D') {
+			drive->doorlocking = 1;
+		}
+	}
+	for (i = 0; i < MAX_DRIVES; ++i) {
+		ide_hwif_t *hwif = HWIF(drive);
+
+		if (drive != &hwif->drives[i]) continue;
+#ifdef DEVFS_MUST_DIE
+		hwif->gd->de_arr[i] = drive->de;
+#endif
+		if (drive->removable)
+			hwif->gd->flags[i] |= GENHD_FL_REMOVABLE;
+		break;
+	}
+
+	/* Extract geometry if we did not already have one for the drive */
+	if (!drive->cyl || !drive->head || !drive->sect) {
+		drive->cyl     = drive->bios_cyl  = id->cyls;
+		drive->head    = drive->bios_head = id->heads;
+		drive->sect    = drive->bios_sect = id->sectors;
+	}
+
+	/* Handle logical geometry translation by the drive */
+	if ((id->field_valid & 1) && id->cur_cyls &&
+	    id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) {
+		drive->cyl  = id->cur_cyls;
+		drive->head = id->cur_heads;
+		drive->sect = id->cur_sectors;
+	}
+
+	/* Use physical geometry if what we have still makes no sense */
+	if (drive->head > 16 && id->heads && id->heads <= 16) {
+		drive->cyl  = id->cyls;
+		drive->head = id->heads;
+		drive->sect = id->sectors;
+	}
+
+	/* calculate drive capacity, and select LBA if possible */
+	init_idedisk_capacity (drive);
+
+	/*
+	 * if possible, give fdisk access to more of the drive,
+	 * by correcting bios_cyls:
+	 */
+	capacity = idedisk_capacity (drive);
+	if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) &&
+	    (!drive->forced_geom) && drive->bios_sect && drive->bios_head)
+		drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head;
+	printk (KERN_INFO "[XEN] %s: %ld sectors", drive->name, capacity);
+
+	/* Give size in megabytes (MB), not mebibytes (MiB). */
+	/* We compute the exact rounded value, avoiding overflow. */
+	printk (" (%ld MB)", (capacity - capacity/625 + 974)/1950);
+
+	/* Only print cache size when it was specified */
+	if (id->buf_size)
+		printk (" w/%dKiB Cache", id->buf_size/2);
+
+	printk(", CHS=%d/%d/%d", 
+	       drive->bios_cyl, drive->bios_head, drive->bios_sect);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+	if (drive->using_dma)
+		(void) HWIF(drive)->dmaproc(ide_dma_verbose, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+	printk("\n");
+
+	drive->mult_count = 0;
+	if (id->max_multsect) {
+#ifdef CONFIG_IDEDISK_MULTI_MODE
+		id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0;
+		id->multsect_valid = id->multsect ? 1 : 0;
+		drive->mult_req = id->multsect_valid ? id->max_multsect : INITIAL_MULT_COUNT;
+		drive->special.b.set_multmode = drive->mult_req ? 1 : 0;
+#else	/* original, pre IDE-NFG, per request of AC */
+		drive->mult_req = INITIAL_MULT_COUNT;
+		if (drive->mult_req > id->max_multsect)
+			drive->mult_req = id->max_multsect;
+		if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
+			drive->special.b.set_multmode = 1;
+#endif	/* CONFIG_IDEDISK_MULTI_MODE */
+	}
+	drive->no_io_32bit = id->dword_io ? 1 : 0;
+	if (drive->id->cfs_enable_2 & 0x3000)
+		write_cache(drive, (id->cfs_enable_2 & 0x3000));
+	(void) probe_lba_addressing(drive, 1);
+}
+
+static int idedisk_cleanup (ide_drive_t *drive)
+{
+	if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+		if (do_idedisk_flushcache(drive))
+			printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+				drive->name);
+	return ide_unregister_subdriver(drive);
+}
+
+int idedisk_reinit(ide_drive_t *drive);
+
+/*
+ *      IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idedisk_driver = {
+	name:			"ide-disk",
+	version:		IDEDISK_VERSION,
+	media:			ide_disk,
+	busy:			0,
+	supports_dma:		1,
+	supports_dsc_overlap:	0,
+	cleanup:		idedisk_cleanup,
+	standby:		do_idedisk_standby,
+	flushcache:		do_idedisk_flushcache,
+	do_request:		do_rw_disk,
+	end_request:		NULL,
+	ioctl:			NULL,
+	open:			idedisk_open,
+	release:		idedisk_release,
+	media_change:		idedisk_media_change,
+	revalidate:		idedisk_revalidate,
+	pre_reset:		idedisk_pre_reset,
+	capacity:		idedisk_capacity,
+	special:		idedisk_special,
+	/*proc:			idedisk_proc,*/
+	reinit:			idedisk_reinit,
+	ata_prebuilder:		NULL,
+	atapi_prebuilder:	NULL,
+};
+
+int idedisk_init (void);
+static ide_module_t idedisk_module = {
+	IDE_DRIVER_MODULE,
+	idedisk_init,
+	&idedisk_driver,
+	NULL
+};
+
+MODULE_DESCRIPTION("ATA DISK Driver");
+
+int idedisk_reinit (ide_drive_t *drive)
+{
+	int failed = 0;
+
+	MOD_INC_USE_COUNT;
+
+	if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+		printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+		return 1;
+	}
+	DRIVER(drive)->busy++;
+	idedisk_setup(drive);
+	if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+		printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+		(void) idedisk_cleanup(drive);
+		DRIVER(drive)->busy--;
+		return 1;
+	}
+	DRIVER(drive)->busy--;
+	failed--;
+
+	ide_register_module(&idedisk_module);
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+static void __exit idedisk_exit (void)
+{
+	ide_drive_t *drive;
+	int failed = 0;
+
+	while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, &idedisk_driver, failed)) != NULL) {
+		if (idedisk_cleanup (drive)) {
+			printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name);
+			failed++;
+		}
+		/* We must remove proc entries defined in this module.
+		   Otherwise we oops while accessing these entries */
+#ifdef CONFIG_PROC_FS
+		if (drive->proc)
+			ide_remove_proc_entries(drive->proc, idedisk_proc);
+#endif
+	}
+	ide_unregister_module(&idedisk_module);
+}
+
+int idedisk_init (void)
+{
+	ide_drive_t *drive;
+	int failed = 0;
+	
+	MOD_INC_USE_COUNT;
+	while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, NULL, failed++)) != NULL) {
+		if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+			printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+			continue;
+		}
+		DRIVER(drive)->busy++;
+		idedisk_setup(drive);
+		if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+			printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+			(void) idedisk_cleanup(drive);
+			DRIVER(drive)->busy--;
+			continue;
+		}
+		DRIVER(drive)->busy--;
+		failed--;
+	}
+	ide_register_module(&idedisk_module);
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+module_init(idedisk_init);
+module_exit(idedisk_exit);
+MODULE_LICENSE("GPL");
diff --git a/xen/drivers/ide/ide-dma.c b/xen/drivers/ide/ide-dma.c
new file mode 100644
index 0000000000..6ce5fd4b1f
--- /dev/null
+++ b/xen/drivers/ide/ide-dma.c
@@ -0,0 +1,913 @@
+/*
+ *  linux/drivers/ide/ide-dma.c		Version 4.10	June 9, 2000
+ *
+ *  Copyright (c) 1999-2000	Andre Hedrick <andre@linux-ide.org>
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ *  Special Thanks to Mark for his Six years of work.
+ *
+ *  Copyright (c) 1995-1998  Mark Lord
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ * This module provides support for the bus-master IDE DMA functions
+ * of various PCI chipsets, including the Intel PIIX (i82371FB for
+ * the 430 FX chipset), the PIIX3 (i82371SB for the 430 HX/VX and 
+ * 440 chipsets), and the PIIX4 (i82371AB for the 430 TX chipset)
+ * ("PIIX" stands for "PCI ISA IDE Xcellerator").
+ *
+ * Pretty much the same code works for other IDE PCI bus-mastering chipsets.
+ *
+ * DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies).
+ *
+ * By default, DMA support is prepared for use, but is currently enabled only
+ * for drives which already have DMA enabled (UltraDMA or mode 2 multi/single),
+ * or which are recognized as "good" (see table below).  Drives with only mode0
+ * or mode1 (multi/single) DMA should also work with this chipset/driver
+ * (eg. MC2112A) but are not enabled by default.
+ *
+ * Use "hdparm -i" to view modes supported by a given drive.
+ *
+ * The hdparm-3.5 (or later) utility can be used for manually enabling/disabling
+ * DMA support, but must be (re-)compiled against this kernel version or later.
+ *
+ * To enable DMA, use "hdparm -d1 /dev/hd?" on a per-drive basis after booting.
+ * If problems arise, ide.c will disable DMA operation after a few retries.
+ * This error recovery mechanism works and has been extremely well exercised.
+ *
+ * IDE drives, depending on their vintage, may support several different modes
+ * of DMA operation.  The boot-time modes are indicated with a "*" in
+ * the "hdparm -i" listing, and can be changed with *knowledgeable* use of
+ * the "hdparm -X" feature.  There is seldom a need to do this, as drives
+ * normally power-up with their "best" PIO/DMA modes enabled.
+ *
+ * Testing has been done with a rather extensive number of drives,
+ * with Quantum & Western Digital models generally outperforming the pack,
+ * and Fujitsu & Conner (and some Seagate which are really Conner) drives
+ * showing more lackluster throughput.
+ *
+ * Keep an eye on /var/adm/messages for "DMA disabled" messages.
+ *
+ * Some people have reported trouble with Intel Zappa motherboards.
+ * This can be fixed by upgrading the AMI BIOS to version 1.00.04.BS0,
+ * available from ftp://ftp.intel.com/pub/bios/10004bs0.exe
+ * (thanks to Glen Morrell <glen@spin.Stanford.edu> for researching this).
+ *
+ * Thanks to "Christopher J. Reimer" <reimer@doe.carleton.ca> for
+ * fixing the problem with the BIOS on some Acer motherboards.
+ *
+ * Thanks to "Benoit Poulot-Cazajous" <poulot@chorus.fr> for testing
+ * "TX" chipset compatibility and for providing patches for the "TX" chipset.
+ *
+ * Thanks to Christian Brunner <chb@muc.de> for taking a good first crack
+ * at generic DMA -- his patches were referred to when preparing this code.
+ *
+ * Most importantly, thanks to Robert Bringman <rob@mars.trion.com>
+ * for supplying a Promise UDMA board & WD UDMA drive for this work!
+ *
+ * And, yes, Intel Zappa boards really *do* use both PIIX IDE ports.
+ *
+ * check_drive_lists(ide_drive_t *drive, int good_bad)
+ *
+ * ATA-66/100 and recovery functions, I forgot the rest......
+ * SELECT_READ_WRITE(hwif,drive,func) for active tuning based on IO direction.
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/pci.h>
+#include <xeno/init.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+/*
+ * Long lost data from 2.0.34 that is now in 2.0.39
+ *
+ * This was used in ./drivers/block/triton.c to do DMA Base address setup
+ * when PnP failed.  Oh the things we forget.  I believe this was part
+ * of SFF-8038i that has been withdrawn from public access... :-((
+ */
+#define DEFAULT_BMIBA	0xe800	/* in case BIOS did not init it */
+#define DEFAULT_BMCRBA	0xcc00	/* VIA's default value */
+#define DEFAULT_BMALIBA	0xd400	/* ALI's default value */
+
+extern char *ide_dmafunc_verbose(ide_dma_action_t dmafunc);
+
+#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS
+
+struct drive_list_entry {
+	char * id_model;
+	char * id_firmware;
+};
+
+struct drive_list_entry drive_whitelist [] = {
+
+	{ "Micropolis 2112A"	,       "ALL"		},
+	{ "CONNER CTMA 4000"	,       "ALL"		},
+	{ "CONNER CTT8000-A"	,       "ALL"		},
+	{ "ST34342A"		,	"ALL"		},
+	{ 0			,	0		}
+};
+
+struct drive_list_entry drive_blacklist [] = {
+
+	{ "WDC AC11000H"	,	"ALL"		},
+	{ "WDC AC22100H"	,	"ALL"		},
+	{ "WDC AC31000H"	,	"ALL"		},
+	{ "WDC AC32500H"	,	"ALL"		},
+	{ "WDC AC33100H"	,	"ALL"		},
+	{ "WDC AC31600H"	,	"ALL"		},
+	{ "WDC AC32100H"	,	"24.09P07"	},
+	{ "WDC AC23200L"	,	"21.10N21"	},
+	{ "Compaq CRD-8241B"	,	"ALL"		},
+	{ "CRD-8400B"		,	"ALL"		},
+	{ "CRD-8480B",			"ALL"		},
+	{ "CRD-8480C",			"ALL"		},
+	{ "CRD-8482B",			"ALL"		},
+ 	{ "CRD-84"		,	"ALL"		},
+	{ "SanDisk SDP3B"	,	"ALL"		},
+	{ "SanDisk SDP3B-64"	,	"ALL"		},
+	{ "SANYO CD-ROM CRD"	,	"ALL"		},
+	{ "HITACHI CDR-8"	,	"ALL"		},
+	{ "HITACHI CDR-8335"	,	"ALL"		},
+	{ "HITACHI CDR-8435"	,	"ALL"		},
+	{ "Toshiba CD-ROM XM-6202B"	,	"ALL"		},
+	{ "CD-532E-A"		,	"ALL"		},
+	{ "E-IDE CD-ROM CR-840",	"ALL"		},
+	{ "CD-ROM Drive/F5A",	"ALL"		},
+	{ "RICOH CD-R/RW MP7083A",	"ALL"		},
+	{ "WPI CDD-820",		"ALL"		},
+	{ "SAMSUNG CD-ROM SC-148C",	"ALL"		},
+	{ "SAMSUNG CD-ROM SC-148F",	"ALL"		},
+	{ "SAMSUNG CD-ROM SC",	"ALL"		},
+	{ "SanDisk SDP3B-64"	,	"ALL"		},
+	{ "SAMSUNG CD-ROM SN-124",	"ALL"		},
+	{ "PLEXTOR CD-R PX-W8432T",	"ALL"		},
+	{ "ATAPI CD-ROM DRIVE 40X MAXIMUM",	"ALL"		},
+	{ "_NEC DV5800A",               "ALL"           },  
+	{ 0			,	0		}
+
+};
+
+int in_drive_list(struct hd_driveid *id, struct drive_list_entry * drive_table)
+{
+	for ( ; drive_table->id_model ; drive_table++)
+		if ((!strcmp(drive_table->id_model, id->model)) &&
+		    ((!strstr(drive_table->id_firmware, id->fw_rev)) ||
+		     (!strcmp(drive_table->id_firmware, "ALL"))))
+			return 1;
+	return 0;
+}
+
+#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+
+/*
+ * good_dma_drives() lists the model names (from "hdparm -i")
+ * of drives which do not support mode2 DMA but which are
+ * known to work fine with this interface under Linux.
+ */
+const char *good_dma_drives[] = {"Micropolis 2112A",
+				 "CONNER CTMA 4000",
+				 "CONNER CTT8000-A",
+				 "ST34342A",	/* for Sun Ultra */
+				 NULL};
+
+/*
+ * bad_dma_drives() lists the model names (from "hdparm -i")
+ * of drives which supposedly support (U)DMA but which are
+ * known to corrupt data with this interface under Linux.
+ *
+ * This is an empirical list. Its generated from bug reports. That means
+ * while it reflects actual problem distributions it doesn't answer whether
+ * the drive or the controller, or cabling, or software, or some combination
+ * thereof is the fault. If you don't happen to agree with the kernel's 
+ * opinion of your drive - use hdparm to turn DMA on.
+ */
+const char *bad_dma_drives[] = {"WDC AC11000H",
+				"WDC AC22100H",
+				"WDC AC32100H",
+				"WDC AC32500H",
+				"WDC AC33100H",
+				"WDC AC31600H",
+ 				NULL};
+
+#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+
+/*
+ * Our Physical Region Descriptor (PRD) table should be large enough
+ * to handle the biggest I/O request we are likely to see.  Since requests
+ * can have no more than 256 sectors, and since the typical blocksize is
+ * two or more sectors, we could get by with a limit of 128 entries here for
+ * the usual worst case.  Most requests seem to include some contiguous blocks,
+ * further reducing the number of table entries required.
+ *
+ * The driver reverts to PIO mode for individual requests that exceed
+ * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling
+ * 100% of all crazy scenarios here is not necessary.
+ *
+ * As it turns out though, we must allocate a full 4KB page for this,
+ * so the two PRD tables (ide0 & ide1) will each get half of that,
+ * allowing each to have about 256 entries (8 bytes each) from this.
+ */
+#define PRD_BYTES	8
+#define PRD_ENTRIES	(PAGE_SIZE / (2 * PRD_BYTES))
+
+/*
+ * dma_intr() is the handler for disk read/write DMA interrupts
+ */
+ide_startstop_t ide_dma_intr (ide_drive_t *drive)
+{
+	int i;
+	byte stat, dma_stat;
+
+	dma_stat = HWIF(drive)->dmaproc(ide_dma_end, drive);
+	stat = GET_STAT();			/* get drive status */
+	if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) {
+		if (!dma_stat) {
+			struct request *rq = HWGROUP(drive)->rq;
+			rq = HWGROUP(drive)->rq;
+			for (i = rq->nr_sectors; i > 0;) {
+				i -= rq->current_nr_sectors;
+				ide_end_request(1, HWGROUP(drive));
+			}
+			return ide_stopped;
+		}
+		printk("%s: dma_intr: bad DMA status (dma_stat=%x)\n", 
+		       drive->name, dma_stat);
+	}
+	return ide_error(drive, "dma_intr", stat);
+}
+
+static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
+{
+	struct buffer_head *bh;
+	struct scatterlist *sg = hwif->sg_table;
+	unsigned long lastdataend = ~0UL;
+	int nents = 0;
+
+	if (hwif->sg_dma_active)
+		BUG();
+
+	if (rq->cmd == READ)
+		hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+	else
+		hwif->sg_dma_direction = PCI_DMA_TODEVICE;
+
+	bh = rq->bh;
+	do {
+		struct scatterlist *sge;
+
+		/*
+		 * continue segment from before?
+		 */
+		if (bh_phys(bh) == lastdataend) {
+			sg[nents - 1].length += bh->b_size;
+			lastdataend += bh->b_size;
+			continue;
+		}
+
+		/*
+		 * start new segment
+		 */
+		if (nents >= PRD_ENTRIES)
+			return 0;
+
+		sge = &sg[nents];
+		memset(sge, 0, sizeof(*sge));
+
+		if (bh->b_page) {
+			sge->page = bh->b_page;
+			sge->offset = bh_offset(bh);
+		} else {
+
+		   
+#if 0 
+		    /* below is wrong for xen since b_data is actually
+		       a 'physical / virtual' thingy. Ask KAF. */
+			if (((unsigned long) bh->b_data) < PAGE_SIZE)
+				BUG();
+#endif
+
+			sge->address = bh->b_data;
+		}
+
+		sge->length = bh->b_size;
+		lastdataend = bh_phys(bh) + bh->b_size;
+		nents++;
+	} while ((bh = bh->b_reqnext) != NULL);
+
+	return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+}
+
+static int ide_raw_build_sglist (ide_hwif_t *hwif, struct request *rq)
+{
+	struct scatterlist *sg = hwif->sg_table;
+	int nents = 0;
+	ide_task_t *args = rq->special;
+	unsigned char *virt_addr = rq->buffer;
+	int sector_count = rq->nr_sectors;
+
+//	if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA) ||
+//	    (args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA_EXT))
+	if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
+		hwif->sg_dma_direction = PCI_DMA_TODEVICE;
+	else
+		hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+	
+	if (sector_count > 128) {
+		memset(&sg[nents], 0, sizeof(*sg));
+		sg[nents].address = virt_addr;
+		sg[nents].length = 128  * SECTOR_SIZE;
+		nents++;
+		virt_addr = virt_addr + (128 * SECTOR_SIZE);
+		sector_count -= 128;
+	}
+	memset(&sg[nents], 0, sizeof(*sg));
+	sg[nents].address = virt_addr;
+	sg[nents].length =  sector_count  * SECTOR_SIZE;
+	nents++;
+   
+	return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+}
+
+/*
+ * ide_build_dmatable() prepares a dma request.
+ * Returns 0 if all went okay, returns 1 otherwise.
+ * May also be invoked from trm290.c
+ */
+int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func)
+{
+	unsigned int *table = HWIF(drive)->dmatable_cpu;
+#ifdef CONFIG_BLK_DEV_TRM290
+	unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290);
+#else
+	const int is_trm290_chipset = 0;
+#endif
+	unsigned int count = 0;
+	int i;
+	struct scatterlist *sg;
+
+	if (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE)
+		HWIF(drive)->sg_nents = i = ide_raw_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
+	else
+		HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
+
+	if (!i)
+		return 0;
+
+	sg = HWIF(drive)->sg_table;
+	while (i) {
+		u32 cur_addr;
+		u32 cur_len;
+
+		cur_addr = sg_dma_address(sg);
+		cur_len = sg_dma_len(sg);
+
+		/*
+		 * Fill in the dma table, without crossing any 64kB boundaries.
+		 * Most hardware requires 16-bit alignment of all blocks,
+		 * but the trm290 requires 32-bit alignment.
+		 */
+
+		while (cur_len) {
+			u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff);
+			
+			if (count++ >= PRD_ENTRIES)
+				BUG();
+
+			if (bcount > cur_len)
+				bcount = cur_len;
+			*table++ = cpu_to_le32(cur_addr);
+			xcount = bcount & 0xffff;
+			if (is_trm290_chipset)
+				xcount = ((xcount >> 2) - 1) << 16;
+			if (xcount == 0x0000) {
+				/* 
+				 * Most chipsets correctly interpret a length
+				 * of 0x0000 as 64KB, but at least one
+				 * (e.g. CS5530) misinterprets it as zero (!).
+				 * So here we break the 64KB entry into two
+				 * 32KB entries instead.
+				 */
+				if (count++ >= PRD_ENTRIES)
+					goto use_pio_instead;
+
+				*table++ = cpu_to_le32(0x8000);
+				*table++ = cpu_to_le32(cur_addr + 0x8000);
+				xcount = 0x8000;
+			}
+			*table++ = cpu_to_le32(xcount);
+			cur_addr += bcount;
+			cur_len -= bcount;
+		}
+
+		sg++;
+		i--;
+	}
+
+	if (count) {
+		if (!is_trm290_chipset)
+			*--table |= cpu_to_le32(0x80000000);
+		return count;
+	}
+	printk("%s: empty DMA table?\n", drive->name);
+use_pio_instead:
+	pci_unmap_sg(HWIF(drive)->pci_dev,
+		     HWIF(drive)->sg_table,
+		     HWIF(drive)->sg_nents,
+		     HWIF(drive)->sg_dma_direction);
+	HWIF(drive)->sg_dma_active = 0;
+	return 0; /* revert to PIO for this request */
+}
+
+/* Teardown mappings after DMA has completed.  */
+void ide_destroy_dmatable (ide_drive_t *drive)
+{
+	struct pci_dev *dev = HWIF(drive)->pci_dev;
+	struct scatterlist *sg = HWIF(drive)->sg_table;
+	int nents = HWIF(drive)->sg_nents;
+
+	pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction);
+	HWIF(drive)->sg_dma_active = 0;
+}
+
+/*
+ *  For both Blacklisted and Whitelisted drives.
+ *  This is setup to be called as an extern for future support
+ *  to other special driver code.
+ */
+int check_drive_lists (ide_drive_t *drive, int good_bad)
+{
+	struct hd_driveid *id = drive->id;
+
+#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS
+	if (good_bad) {
+		return in_drive_list(id, drive_whitelist);
+	} else {
+		int blacklist = in_drive_list(id, drive_blacklist);
+		if (blacklist)
+			printk("%s: Disabling (U)DMA for %s\n", drive->name, id->model);
+		return(blacklist);
+	}
+#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+	const char **list;
+
+	if (good_bad) {
+		/* Consult the list of known "good" drives */
+		list = good_dma_drives;
+		while (*list) {
+			if (!strcmp(*list++,id->model))
+				return 1;
+		}
+	} else {
+		/* Consult the list of known "bad" drives */
+		list = bad_dma_drives;
+		while (*list) {
+			if (!strcmp(*list++,id->model)) {
+				printk("%s: Disabling (U)DMA for %s\n",
+					drive->name, id->model);
+				return 1;
+			}
+		}
+	}
+#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+	return 0;
+}
+
+int report_drive_dmaing (ide_drive_t *drive)
+{
+	struct hd_driveid *id = drive->id;
+
+	if ((id->field_valid & 4) && (eighty_ninty_three(drive)) &&
+	    (id->dma_ultra & (id->dma_ultra >> 14) & 3)) {
+		if ((id->dma_ultra >> 15) & 1) {
+			printk(", UDMA(mode 7)");	/* UDMA BIOS-enabled! */
+		} else {
+			printk(", UDMA(133)");	/* UDMA BIOS-enabled! */
+		}
+	} else if ((id->field_valid & 4) && (eighty_ninty_three(drive)) &&
+	  	  (id->dma_ultra & (id->dma_ultra >> 11) & 7)) {
+		if ((id->dma_ultra >> 13) & 1) {
+			printk(", UDMA(100)");	/* UDMA BIOS-enabled! */
+		} else if ((id->dma_ultra >> 12) & 1) {
+			printk(", UDMA(66)");	/* UDMA BIOS-enabled! */
+		} else {
+			printk(", UDMA(44)");	/* UDMA BIOS-enabled! */
+		}
+	} else if ((id->field_valid & 4) &&
+		   (id->dma_ultra & (id->dma_ultra >> 8) & 7)) {
+		if ((id->dma_ultra >> 10) & 1) {
+			printk(", UDMA(33)");	/* UDMA BIOS-enabled! */
+		} else if ((id->dma_ultra >> 9) & 1) {
+			printk(", UDMA(25)");	/* UDMA BIOS-enabled! */
+		} else {
+			printk(", UDMA(16)");	/* UDMA BIOS-enabled! */
+		}
+	} else if (id->field_valid & 4) {
+		printk(", (U)DMA");	/* Can be BIOS-enabled! */
+	} else {
+		printk(", DMA");
+	}
+	return 1;
+}
+
+static int config_drive_for_dma (ide_drive_t *drive)
+{
+	int config_allows_dma = 1;
+	struct hd_driveid *id = drive->id;
+	ide_hwif_t *hwif = HWIF(drive);
+
+#ifdef CONFIG_IDEDMA_ONLYDISK
+	if (drive->media != ide_disk)
+		config_allows_dma = 0;
+#endif
+
+	if (id && (id->capability & 1) && hwif->autodma && config_allows_dma) {
+		/* Consult the list of known "bad" drives */
+		if (ide_dmaproc(ide_dma_bad_drive, drive))
+			return hwif->dmaproc(ide_dma_off, drive);
+
+		/* Enable DMA on any drive that has UltraDMA (mode 6/7/?) enabled */
+		if ((id->field_valid & 4) && (eighty_ninty_three(drive)))
+			if ((id->dma_ultra & (id->dma_ultra >> 14) & 2))
+				return hwif->dmaproc(ide_dma_on, drive);
+		/* Enable DMA on any drive that has UltraDMA (mode 3/4/5) enabled */
+		if ((id->field_valid & 4) && (eighty_ninty_three(drive)))
+			if ((id->dma_ultra & (id->dma_ultra >> 11) & 7))
+				return hwif->dmaproc(ide_dma_on, drive);
+		/* Enable DMA on any drive that has UltraDMA (mode 0/1/2) enabled */
+		if (id->field_valid & 4)	/* UltraDMA */
+			if ((id->dma_ultra & (id->dma_ultra >> 8) & 7))
+				return hwif->dmaproc(ide_dma_on, drive);
+		/* Enable DMA on any drive that has mode2 DMA (multi or single) enabled */
+		if (id->field_valid & 2)	/* regular DMA */
+			if ((id->dma_mword & 0x404) == 0x404 || (id->dma_1word & 0x404) == 0x404)
+				return hwif->dmaproc(ide_dma_on, drive);
+		/* Consult the list of known "good" drives */
+		if (ide_dmaproc(ide_dma_good_drive, drive))
+			return hwif->dmaproc(ide_dma_on, drive);
+	}
+	return hwif->dmaproc(ide_dma_off_quietly, drive);
+}
+
+#ifndef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+/*
+ * 1 dmaing, 2 error, 4 intr
+ */
+static int dma_timer_expiry (ide_drive_t *drive)
+{
+	byte dma_stat = inb(HWIF(drive)->dma_base+2);
+
+#ifdef DEBUG
+	printk("%s: dma_timer_expiry: dma status == 0x%02x\n", drive->name, dma_stat);
+#endif /* DEBUG */
+
+#if 0
+	HWGROUP(drive)->expiry = NULL;	/* one free ride for now */
+#endif
+
+	if (dma_stat & 2) {	/* ERROR */
+		byte stat = GET_STAT();
+		return ide_error(drive, "dma_timer_expiry", stat);
+	}
+	if (dma_stat & 1)	/* DMAing */
+		return WAIT_CMD;
+	return 0;
+}
+#else /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+static ide_startstop_t ide_dma_timeout_revovery (ide_drive_t *drive)
+{
+	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
+	ide_hwif_t *hwif	= HWIF(drive);
+	int enable_dma		= drive->using_dma;
+	unsigned long flags;
+	ide_startstop_t startstop;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	hwgroup->handler = NULL;
+	del_timer(&hwgroup->timer);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	drive->waiting_for_dma = 0;
+
+	startstop = ide_do_reset(drive);
+
+	if ((enable_dma) && !(drive->using_dma))
+		(void) hwif->dmaproc(ide_dma_on, drive);
+
+	return startstop;
+}
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+
+#if 0
+static inline void ide_toggle_bounce(ide_drive_t *drive, int on)
+{
+	dma64_addr_t addr = BLK_BOUNCE_HIGH;
+
+	if (HWIF(drive)->no_highio || HWIF(drive)->pci_dev == NULL)
+		return;
+
+	if (on && drive->media == ide_disk) {
+		if (!PCI_DMA_BUS_IS_PHYS)
+			addr = BLK_BOUNCE_ANY;
+		else
+			addr = HWIF(drive)->pci_dev->dma_mask;
+	}
+
+	blk_queue_bounce_limit(&drive->queue, addr);
+}
+#endif
+
+/*
+ * ide_dmaproc() initiates/aborts DMA read/write operations on a drive.
+ *
+ * The caller is assumed to have selected the drive and programmed the drive's
+ * sector address using CHS or LBA.  All that remains is to prepare for DMA
+ * and then issue the actual read/write DMA/PIO command to the drive.
+ *
+ * For ATAPI devices, we just prepare for DMA and return. The caller should
+ * then issue the packet command to the drive and call us again with
+ * ide_dma_begin afterwards.
+ *
+ * Returns 0 if all went well.
+ * Returns 1 if DMA read/write could not be started, in which case
+ * the caller should revert to PIO for the current request.
+ * May also be invoked from trm290.c
+ */
+int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive)
+{
+//	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
+	ide_hwif_t *hwif		= HWIF(drive);
+	unsigned long dma_base		= hwif->dma_base;
+	byte unit			= (drive->select.b.unit & 0x01);
+	unsigned int count, reading = 0/*, set_high = 1*/;
+	byte dma_stat;
+
+	switch (func) {
+		case ide_dma_off:
+			printk("%s: DMA disabled\n", drive->name);
+		case ide_dma_off_quietly:
+                    /*set_high = 0;*/
+			outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2);
+		case ide_dma_on:
+			drive->using_dma = (func == ide_dma_on);
+			if (drive->using_dma)
+				outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2);
+			/*ide_toggle_bounce(drive, set_high);*/
+			return 0;
+		case ide_dma_check:
+			return config_drive_for_dma (drive);
+		case ide_dma_read:
+			reading = 1 << 3;
+		case ide_dma_write:
+			SELECT_READ_WRITE(hwif,drive,func);
+			if (!(count = ide_build_dmatable(drive, func)))
+				return 1;	/* try PIO instead of DMA */
+			outl(hwif->dmatable_dma, dma_base + 4); /* PRD table */
+			outb(reading, dma_base);			/* specify r/w */
+			outb(inb(dma_base+2)|6, dma_base+2);		/* clear INTR & ERROR flags */
+			drive->waiting_for_dma = 1;
+			if (drive->media != ide_disk)
+				return 0;
+#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+			ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, NULL);	/* issue cmd to drive */
+#else /* !CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+			ide_set_handler(drive, &ide_dma_intr, WAIT_CMD, dma_timer_expiry);	/* issue cmd to drive */
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+			if ((HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) &&
+			    (drive->addressing == 1)) {
+				ide_task_t *args = HWGROUP(drive)->rq->special;
+				OUT_BYTE(args->tfRegister[IDE_COMMAND_OFFSET], IDE_COMMAND_REG);
+			} else if (drive->addressing) {
+				OUT_BYTE(reading ? WIN_READDMA_EXT : WIN_WRITEDMA_EXT, IDE_COMMAND_REG);
+			} else {
+				OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
+			}
+			return HWIF(drive)->dmaproc(ide_dma_begin, drive);
+		case ide_dma_begin:
+			/* Note that this is done *after* the cmd has
+			 * been issued to the drive, as per the BM-IDE spec.
+			 * The Promise Ultra33 doesn't work correctly when
+			 * we do this part before issuing the drive cmd.
+			 */
+			outb(inb(dma_base)|1, dma_base);		/* start DMA */
+			return 0;
+		case ide_dma_end: /* returns 1 on error, 0 otherwise */
+			drive->waiting_for_dma = 0;
+			outb(inb(dma_base)&~1, dma_base);	/* stop DMA */
+			dma_stat = inb(dma_base+2);		/* get DMA status */
+			outb(dma_stat|6, dma_base+2);	/* clear the INTR & ERROR bits */
+			ide_destroy_dmatable(drive);	/* purge DMA mappings */
+			return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0;	/* verify good DMA status */
+		case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */
+			dma_stat = inb(dma_base+2);
+#if 0  /* do not set unless you know what you are doing */
+			if (dma_stat & 4) {
+				byte stat = GET_STAT();
+				outb(dma_base+2, dma_stat & 0xE4);
+			}
+#endif
+			return (dma_stat & 4) == 4;	/* return 1 if INTR asserted */
+		case ide_dma_bad_drive:
+		case ide_dma_good_drive:
+			return check_drive_lists(drive, (func == ide_dma_good_drive));
+		case ide_dma_verbose:
+			return report_drive_dmaing(drive);
+		case ide_dma_timeout:
+			// FIXME: Many IDE chipsets do not permit command file register access
+			// FIXME: while the bus-master function is still active.
+			// FIXME: To prevent deadlock with those chipsets, we must be extremely
+			// FIXME: careful here (and in ide_intr() as well) to NOT access any
+			// FIXME: registers from the 0x1Fx/0x17x sets before terminating the
+			// FIXME: bus-master operation via the bus-master control reg.
+			// FIXME: Otherwise, chipset deadlock will occur, and some systems will
+			// FIXME: lock up completely!!
+#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+			/*
+			 * Have to issue an abort and requeue the request
+			 * DMA engine got turned off by a goofy ASIC, and
+			 * we have to clean up the mess, and here is as good
+			 * as any.  Do it globally for all chipsets.
+			 */
+			outb(0x00, dma_base);		/* stop DMA */
+			dma_stat = inb(dma_base+2);	/* get DMA status */
+			outb(dma_stat|6, dma_base+2);	/* clear the INTR & ERROR bits */
+			printk("%s: %s: Lets do it again!" \
+				"stat = 0x%02x, dma_stat = 0x%02x\n",
+				drive->name, ide_dmafunc_verbose(func),
+				GET_STAT(), dma_stat);
+
+			if (dma_stat & 0xF0)
+				return ide_dma_timeout_revovery(drive);
+
+			printk("%s: %s: (restart_request) Lets do it again!" \
+				"stat = 0x%02x, dma_stat = 0x%02x\n",
+				drive->name, ide_dmafunc_verbose(func),
+				GET_STAT(), dma_stat);
+
+			return restart_request(drive);  // BUG: return types do not match!!
+//#else
+//			return HWGROUP(drive)->handler(drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+		case ide_dma_retune:
+		case ide_dma_lostirq:
+			printk("ide_dmaproc: chipset supported %s func only: %d\n", ide_dmafunc_verbose(func),  func);
+			return 1;
+		default:
+			printk("ide_dmaproc: unsupported %s func: %d\n", ide_dmafunc_verbose(func), func);
+			return 1;
+	}
+}
+
+/*
+ * Needed for allowing full modular support of ide-driver
+ */
+int ide_release_dma (ide_hwif_t *hwif)
+{
+	if (hwif->dmatable_cpu) {
+		pci_free_consistent(hwif->pci_dev,
+				    PRD_ENTRIES * PRD_BYTES,
+				    hwif->dmatable_cpu,
+				    hwif->dmatable_dma);
+		hwif->dmatable_cpu = NULL;
+	}
+	if (hwif->sg_table) {
+		kfree(hwif->sg_table);
+		hwif->sg_table = NULL;
+	}
+	if ((hwif->dma_extra) && (hwif->channel == 0))
+		release_region((hwif->dma_base + 16), hwif->dma_extra);
+	release_region(hwif->dma_base, 8);
+	return 1;
+}
+
+/*
+ *	This can be called for a dynamically installed interface. Don't __init it
+ */
+ 
+void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_ports)
+{
+	printk("    %s: BM-DMA at 0x%04lx-0x%04lx", hwif->name, dma_base, dma_base + num_ports - 1);
+	if (check_region(dma_base, num_ports)) {
+		printk(" -- ERROR, PORT ADDRESSES ALREADY IN USE\n");
+		return;
+	}
+	request_region(dma_base, num_ports, hwif->name);
+	hwif->dma_base = dma_base;
+	hwif->dmatable_cpu = pci_alloc_consistent(hwif->pci_dev,
+						    PRD_ENTRIES * PRD_BYTES,
+						    &hwif->dmatable_dma);
+	if (hwif->dmatable_cpu == NULL)
+		goto dma_alloc_failure;
+
+	hwif->sg_table = kmalloc(sizeof(struct scatterlist) * PRD_ENTRIES,
+				 GFP_KERNEL);
+	if (hwif->sg_table == NULL) {
+		pci_free_consistent(hwif->pci_dev, PRD_ENTRIES * PRD_BYTES,
+				    hwif->dmatable_cpu, hwif->dmatable_dma);
+		goto dma_alloc_failure;
+	}
+
+	hwif->dmaproc = &ide_dmaproc;
+
+	if (hwif->chipset != ide_trm290) {
+		byte dma_stat = inb(dma_base+2);
+		printk(", BIOS settings: %s:%s, %s:%s",
+		       hwif->drives[0].name, (dma_stat & 0x20) ? "DMA" : "pio",
+		       hwif->drives[1].name, (dma_stat & 0x40) ? "DMA" : "pio");
+	}
+	printk("\n");
+	return;
+
+dma_alloc_failure:
+	printk(" -- ERROR, UNABLE TO ALLOCATE DMA TABLES\n");
+}
+
+/*
+ * Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space:
+ */
+unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name)
+{
+	unsigned long	dma_base = 0;
+	struct pci_dev	*dev = hwif->pci_dev;
+
+#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED
+	int second_chance = 0;
+
+second_chance_to_dma:
+#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */
+
+	if (hwif->mate && hwif->mate->dma_base) {
+		dma_base = hwif->mate->dma_base - (hwif->channel ? 0 : 8);
+	} else {
+		dma_base = pci_resource_start(dev, 4);
+		if (!dma_base) {
+			printk("%s: dma_base is invalid (0x%04lx)\n", name, dma_base);
+			dma_base = 0;
+		}
+	}
+
+#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED
+	if ((!dma_base) && (!second_chance)) {
+		unsigned long set_bmiba = 0;
+		second_chance++;
+		switch(dev->vendor) {
+			case PCI_VENDOR_ID_AL:
+				set_bmiba = DEFAULT_BMALIBA; break;
+			case PCI_VENDOR_ID_VIA:
+				set_bmiba = DEFAULT_BMCRBA; break;
+			case PCI_VENDOR_ID_INTEL:
+				set_bmiba = DEFAULT_BMIBA; break;
+			default:
+				return dma_base;
+		}
+		pci_write_config_dword(dev, 0x20, set_bmiba|1);
+		goto second_chance_to_dma;
+	}
+#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */
+
+	if (dma_base) {
+		if (extra) /* PDC20246, PDC20262, HPT343, & HPT366 */
+			request_region(dma_base+16, extra, name);
+		dma_base += hwif->channel ? 8 : 0;
+		hwif->dma_extra = extra;
+
+		switch(dev->device) {
+			case PCI_DEVICE_ID_AL_M5219:
+			case PCI_DEVICE_ID_AMD_VIPER_7409:
+			case PCI_DEVICE_ID_CMD_643:
+				outb(inb(dma_base+2) & 0x60, dma_base+2);
+				if (inb(dma_base+2) & 0x80) {
+					printk("%s: simplex device: DMA forced\n", name);
+				}
+				break;
+			default:
+				/*
+				 * If the device claims "simplex" DMA,
+				 * this means only one of the two interfaces
+				 * can be trusted with DMA at any point in time.
+				 * So we should enable DMA only on one of the
+				 * two interfaces.
+				 */
+				if ((inb(dma_base+2) & 0x80)) {	/* simplex device? */
+					if ((!hwif->drives[0].present && !hwif->drives[1].present) ||
+					    (hwif->mate && hwif->mate->dma_base)) {
+						printk("%s: simplex device:  DMA disabled\n", name);
+						dma_base = 0;
+					}
+				}
+		}
+	}
+	return dma_base;
+}
diff --git a/xen/drivers/ide/ide-features.c b/xen/drivers/ide/ide-features.c
new file mode 100644
index 0000000000..a60af2dad3
--- /dev/null
+++ b/xen/drivers/ide/ide-features.c
@@ -0,0 +1,384 @@
+/*
+ * linux/drivers/block/ide-features.c	Version 0.04	June 9, 2000
+ *
+ *  Copyright (C) 1999-2000	Linus Torvalds & authors (see below)
+ *  
+ *  Copyright (C) 1999-2000	Andre Hedrick <andre@linux-ide.org>
+ *
+ *  Extracts if ide.c to address the evolving transfer rate code for
+ *  the SETFEATURES_XFER callouts.  Various parts of any given function
+ *  are credited to previous ATA-IDE maintainers.
+ *
+ *  Auto-CRC downgrade for Ultra DMA(ing)
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+#include <xeno/config.h>
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/hdreg.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+/*
+ * A Verbose noise maker for debugging on the attempted transfer rates.
+ */
+char *ide_xfer_verbose (byte xfer_rate)
+{
+	switch(xfer_rate) {
+		case XFER_UDMA_7:	return("UDMA 7");
+		case XFER_UDMA_6:	return("UDMA 6");
+		case XFER_UDMA_5:	return("UDMA 5");
+		case XFER_UDMA_4:	return("UDMA 4");
+		case XFER_UDMA_3:	return("UDMA 3");
+		case XFER_UDMA_2:	return("UDMA 2");
+		case XFER_UDMA_1:	return("UDMA 1");
+		case XFER_UDMA_0:	return("UDMA 0");
+		case XFER_MW_DMA_2:	return("MW DMA 2");
+		case XFER_MW_DMA_1:	return("MW DMA 1");
+		case XFER_MW_DMA_0:	return("MW DMA 0");
+		case XFER_SW_DMA_2:	return("SW DMA 2");
+		case XFER_SW_DMA_1:	return("SW DMA 1");
+		case XFER_SW_DMA_0:	return("SW DMA 0");
+		case XFER_PIO_4:	return("PIO 4");
+		case XFER_PIO_3:	return("PIO 3");
+		case XFER_PIO_2:	return("PIO 2");
+		case XFER_PIO_1:	return("PIO 1");
+		case XFER_PIO_0:	return("PIO 0");
+		case XFER_PIO_SLOW:	return("PIO SLOW");
+		default:		return("XFER ERROR");
+	}
+}
+
+/*
+ *
+ */
+char *ide_media_verbose (ide_drive_t *drive)
+{
+	switch (drive->media) {
+		case ide_scsi:		return("scsi   ");
+		case ide_disk:		return("disk   ");
+		case ide_optical:	return("optical");
+		case ide_cdrom:		return("cdrom  ");
+		case ide_tape:		return("tape   ");
+		case ide_floppy:	return("floppy ");
+		default:		return("???????");
+	}
+}
+
+/*
+ * A Verbose noise maker for debugging on the attempted dmaing calls.
+ */
+char *ide_dmafunc_verbose (ide_dma_action_t dmafunc)
+{
+	switch (dmafunc) {
+		case ide_dma_read:		return("ide_dma_read");
+		case ide_dma_write:		return("ide_dma_write");
+		case ide_dma_begin:		return("ide_dma_begin");
+		case ide_dma_end:		return("ide_dma_end:");
+		case ide_dma_check:		return("ide_dma_check");
+		case ide_dma_on:		return("ide_dma_on");
+		case ide_dma_off:		return("ide_dma_off");
+		case ide_dma_off_quietly:	return("ide_dma_off_quietly");
+		case ide_dma_test_irq:		return("ide_dma_test_irq");
+		case ide_dma_bad_drive:		return("ide_dma_bad_drive");
+		case ide_dma_good_drive:	return("ide_dma_good_drive");
+		case ide_dma_verbose:		return("ide_dma_verbose");
+		case ide_dma_retune:		return("ide_dma_retune");
+		case ide_dma_lostirq:		return("ide_dma_lostirq");
+		case ide_dma_timeout:		return("ide_dma_timeout");
+		default:			return("unknown");
+	}
+}
+
+/*
+ *
+ */
+byte ide_auto_reduce_xfer (ide_drive_t *drive)
+{
+	if (!drive->crc_count)
+		return drive->current_speed;
+	drive->crc_count = 0;
+
+	switch(drive->current_speed) {
+		case XFER_UDMA_7:	return XFER_UDMA_6;
+		case XFER_UDMA_6:	return XFER_UDMA_5;
+		case XFER_UDMA_5:	return XFER_UDMA_4;
+		case XFER_UDMA_4:	return XFER_UDMA_3;
+		case XFER_UDMA_3:	return XFER_UDMA_2;
+		case XFER_UDMA_2:	return XFER_UDMA_1;
+		case XFER_UDMA_1:	return XFER_UDMA_0;
+			/*
+			 * OOPS we do not goto non Ultra DMA modes
+			 * without iCRC's available we force
+			 * the system to PIO and make the user
+			 * invoke the ATA-1 ATA-2 DMA modes.
+			 */
+		case XFER_UDMA_0:
+		default:		return XFER_PIO_4;
+	}
+}
+
+/*
+ * Update the 
+ */
+int ide_driveid_update (ide_drive_t *drive)
+{
+	/*
+	 * Re-read drive->id for possible DMA mode
+	 * change (copied from ide-probe.c)
+	 */
+	struct hd_driveid *id;
+	unsigned long timeout, flags;
+
+	SELECT_MASK(HWIF(drive), drive, 1);
+	if (IDE_CONTROL_REG)
+		OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+	ide_delay_50ms();
+	OUT_BYTE(WIN_IDENTIFY, IDE_COMMAND_REG);
+	timeout = jiffies + WAIT_WORSTCASE;
+	do {
+		if (0 < (signed long)(jiffies - timeout)) {
+			SELECT_MASK(HWIF(drive), drive, 0);
+			return 0;	/* drive timed-out */
+		}
+		ide_delay_50ms();	/* give drive a breather */
+	} while (IN_BYTE(IDE_ALTSTATUS_REG) & BUSY_STAT);
+	ide_delay_50ms();	/* wait for IRQ and DRQ_STAT */
+	if (!OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+		SELECT_MASK(HWIF(drive), drive, 0);
+		printk("%s: CHECK for good STATUS\n", drive->name);
+		return 0;
+	}
+	__save_flags(flags);	/* local CPU only */
+	__cli();		/* local CPU only; some systems need this */
+	SELECT_MASK(HWIF(drive), drive, 0);
+	id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
+	if (!id) {
+		__restore_flags(flags);	/* local CPU only */
+		return 0;
+	}
+	ide_input_data(drive, id, SECTOR_WORDS);
+	(void) GET_STAT();	/* clear drive IRQ */
+	ide__sti();		/* local CPU only */
+	__restore_flags(flags);	/* local CPU only */
+	ide_fix_driveid(id);
+	if (id) {
+		drive->id->dma_ultra = id->dma_ultra;
+		drive->id->dma_mword = id->dma_mword;
+		drive->id->dma_1word = id->dma_1word;
+		/* anything more ? */
+		kfree(id);
+	}
+
+	return 1;
+}
+
+/*
+ * Verify that we are doing an approved SETFEATURES_XFER with respect
+ * to the hardware being able to support request.  Since some hardware
+ * can improperly report capabilties, we check to see if the host adapter
+ * in combination with the device (usually a disk) properly detect
+ * and acknowledge each end of the ribbon.
+ */
+int ide_ata66_check (ide_drive_t *drive, ide_task_t *args)
+{
+	if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) &&
+	    (args->tfRegister[IDE_SECTOR_OFFSET] > XFER_UDMA_2) &&
+	    (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER)) {
+		if (!HWIF(drive)->udma_four) {
+			printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", HWIF(drive)->name);
+			return 1;
+		}
+#ifndef CONFIG_IDEDMA_IVB
+		if ((drive->id->hw_config & 0x6000) == 0) {
+#else /* !CONFIG_IDEDMA_IVB */
+		if (((drive->id->hw_config & 0x2000) == 0) ||
+		    ((drive->id->hw_config & 0x4000) == 0)) {
+#endif /* CONFIG_IDEDMA_IVB */
+			printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", drive->name);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Backside of HDIO_DRIVE_CMD call of SETFEATURES_XFER.
+ * 1 : Safe to update drive->id DMA registers.
+ * 0 : OOPs not allowed.
+ */
+int set_transfer (ide_drive_t *drive, ide_task_t *args)
+{
+	if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) &&
+	    (args->tfRegister[IDE_SECTOR_OFFSET] >= XFER_SW_DMA_0) &&
+	    (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER) &&
+	    (drive->id->dma_ultra ||
+	     drive->id->dma_mword ||
+	     drive->id->dma_1word))
+		return 1;
+
+	return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+/*
+ *  All hosts that use the 80c ribbon mus use!
+ */
+byte eighty_ninty_three (ide_drive_t *drive)
+{
+#ifdef CONFIG_BLK_DEV_IDEPCI
+	if (HWIF(drive)->pci_devid.vid==0x105a)
+	    return(HWIF(drive)->udma_four);
+#endif
+	/* PDC202XX: that's because some HDD will return wrong info */
+	return ((byte) ((HWIF(drive)->udma_four) &&
+#ifndef CONFIG_IDEDMA_IVB
+			(drive->id->hw_config & 0x4000) &&
+#endif /* CONFIG_IDEDMA_IVB */
+			(drive->id->hw_config & 0x6000)) ? 1 : 0);
+}
+#endif // CONFIG_BLK_DEV_IDEDMA
+
+/*
+ * Similar to ide_wait_stat(), except it never calls ide_error internally.
+ * This is a kludge to handle the new ide_config_drive_speed() function,
+ * and should not otherwise be used anywhere.  Eventually, the tuneproc's
+ * should be updated to return ide_startstop_t, in which case we can get
+ * rid of this abomination again.  :)   -ml
+ *
+ * It is gone..........
+ *
+ * const char *msg == consider adding for verbose errors.
+ */
+int ide_config_drive_speed (ide_drive_t *drive, byte speed)
+{
+	ide_hwif_t *hwif = HWIF(drive);
+	int	i, error = 1;
+	byte stat;
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+	byte unit = (drive->select.b.unit & 0x01);
+	outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2);
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+	/*
+	 * Don't use ide_wait_cmd here - it will
+	 * attempt to set_geometry and recalibrate,
+	 * but for some reason these don't work at
+	 * this point (lost interrupt).
+	 */
+        /*
+         * Select the drive, and issue the SETFEATURES command
+         */
+	disable_irq(hwif->irq);	/* disable_irq_nosync ?? */
+	udelay(1);
+	SELECT_DRIVE(HWIF(drive), drive);
+	SELECT_MASK(HWIF(drive), drive, 0);
+	udelay(1);
+	if (IDE_CONTROL_REG)
+		OUT_BYTE(drive->ctl | 2, IDE_CONTROL_REG);
+	OUT_BYTE(speed, IDE_NSECTOR_REG);
+	OUT_BYTE(SETFEATURES_XFER, IDE_FEATURE_REG);
+	OUT_BYTE(WIN_SETFEATURES, IDE_COMMAND_REG);
+	if ((IDE_CONTROL_REG) && (drive->quirk_list == 2))
+		OUT_BYTE(drive->ctl, IDE_CONTROL_REG);
+	udelay(1);
+	/*
+	 * Wait for drive to become non-BUSY
+	 */
+	if ((stat = GET_STAT()) & BUSY_STAT) {
+		unsigned long flags, timeout;
+		__save_flags(flags);	/* local CPU only */
+		ide__sti();		/* local CPU only -- for jiffies */
+		timeout = jiffies + WAIT_CMD;
+		while ((stat = GET_STAT()) & BUSY_STAT) {
+			if (0 < (signed long)(jiffies - timeout))
+				break;
+		}
+		__restore_flags(flags); /* local CPU only */
+	}
+
+	/*
+	 * Allow status to settle, then read it again.
+	 * A few rare drives vastly violate the 400ns spec here,
+	 * so we'll wait up to 10usec for a "good" status
+	 * rather than expensively fail things immediately.
+	 * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
+	 */
+	for (i = 0; i < 10; i++) {
+		udelay(1);
+		if (OK_STAT((stat = GET_STAT()), DRIVE_READY, BUSY_STAT|DRQ_STAT|ERR_STAT)) {
+			error = 0;
+			break;
+		}
+	}
+
+	SELECT_MASK(HWIF(drive), drive, 0);
+
+	enable_irq(hwif->irq);
+
+	if (error) {
+		(void) ide_dump_status(drive, "set_drive_speed_status", stat);
+		return error;
+	}
+
+	drive->id->dma_ultra &= ~0xFF00;
+	drive->id->dma_mword &= ~0x0F00;
+	drive->id->dma_1word &= ~0x0F00;
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+	if (speed > XFER_PIO_4) {
+		outb(inb(hwif->dma_base+2)|(1<<(5+unit)), hwif->dma_base+2);
+	} else {
+		outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2);
+	}
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+	switch(speed) {
+		case XFER_UDMA_7:   drive->id->dma_ultra |= 0x8080; break;
+		case XFER_UDMA_6:   drive->id->dma_ultra |= 0x4040; break;
+		case XFER_UDMA_5:   drive->id->dma_ultra |= 0x2020; break;
+		case XFER_UDMA_4:   drive->id->dma_ultra |= 0x1010; break;
+		case XFER_UDMA_3:   drive->id->dma_ultra |= 0x0808; break;
+		case XFER_UDMA_2:   drive->id->dma_ultra |= 0x0404; break;
+		case XFER_UDMA_1:   drive->id->dma_ultra |= 0x0202; break;
+		case XFER_UDMA_0:   drive->id->dma_ultra |= 0x0101; break;
+		case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break;
+		case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break;
+		case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break;
+		case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break;
+		case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break;
+		case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break;
+		default: break;
+	}
+	return error;
+}
+
+EXPORT_SYMBOL(ide_auto_reduce_xfer);
+EXPORT_SYMBOL(ide_driveid_update);
+EXPORT_SYMBOL(ide_ata66_check);
+EXPORT_SYMBOL(set_transfer);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+EXPORT_SYMBOL(eighty_ninty_three);
+#endif // CONFIG_BLK_DEV_IDEDMA
+EXPORT_SYMBOL(ide_config_drive_speed);
+
diff --git a/xen/drivers/ide/ide-geometry.c b/xen/drivers/ide/ide-geometry.c
new file mode 100644
index 0000000000..22428288b6
--- /dev/null
+++ b/xen/drivers/ide/ide-geometry.c
@@ -0,0 +1,222 @@
+/*
+ * linux/drivers/ide/ide-geometry.c
+ */
+#include <xeno/config.h>
+#include <xeno/ide.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_IDE
+
+/*
+ * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc
+ * controller that is BIOS compatible with ST-506, and thus showing up in our
+ * BIOS table, but not register compatible, and therefore not present in CMOS.
+ *
+ * Furthermore, we will assume that our ST-506 drives <if any> are the primary
+ * drives in the system -- the ones reflected as drive 1 or 2.  The first
+ * drive is stored in the high nibble of CMOS byte 0x12, the second in the low
+ * nibble.  This will be either a 4 bit drive type or 0xf indicating use byte
+ * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS.  A non-zero value
+ * means we have an AT controller hard disk for that drive.
+ *
+ * Of course, there is no guarantee that either drive is actually on the
+ * "primary" IDE interface, but we don't bother trying to sort that out here.
+ * If a drive is not actually on the primary interface, then these parameters
+ * will be ignored.  This results in the user having to supply the logical
+ * drive geometry as a boot parameter for each drive not on the primary i/f.
+ */
+/*
+ * The only "perfect" way to handle this would be to modify the setup.[cS] code
+ * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info
+ * for us during initialization.  I have the necessary docs -- any takers?  -ml
+ */
+/*
+ * I did this, but it doesnt work - there is no reasonable way to find the
+ * correspondence between the BIOS numbering of the disks and the Linux
+ * numbering. -aeb
+ *
+ * The code below is bad. One of the problems is that drives 1 and 2
+ * may be SCSI disks (even when IDE disks are present), so that
+ * the geometry we read here from BIOS is attributed to the wrong disks.
+ * Consequently, also the former "drive->present = 1" below was a mistake.
+ *
+ * Eventually the entire routine below should be removed.
+ *
+ * 17-OCT-2000 rjohnson@analogic.com Added spin-locks for reading CMOS
+ * chip.
+ */
+
+void probe_cmos_for_drives (ide_hwif_t *hwif)
+{
+#ifdef __i386__
+        extern struct drive_info_struct drive_info;
+	byte cmos_disks, *BIOS = (byte *) &drive_info;
+	int unit;
+	unsigned long flags;
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+	if (hwif->chipset == ide_pdc4030 && hwif->channel != 0)
+		return;
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+	spin_lock_irqsave(&rtc_lock, flags);
+	cmos_disks = CMOS_READ(0x12);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	/* Extract drive geometry from CMOS+BIOS if not already setup */
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		ide_drive_t *drive = &hwif->drives[unit];
+		if ((cmos_disks & (0xf0 >> (unit*4)))
+		   && !drive->present && !drive->nobios) {
+			unsigned short cyl = *(unsigned short *)BIOS;
+			unsigned char head = *(BIOS+2);
+			unsigned char sect = *(BIOS+14);
+			if (cyl > 0 && head > 0 && sect > 0 && sect < 64) {
+				drive->cyl   = drive->bios_cyl  = cyl;
+				drive->head  = drive->bios_head = head;
+				drive->sect  = drive->bios_sect = sect;
+				drive->ctl   = *(BIOS+8);
+			} else {
+				printk("hd%c: C/H/S=%d/%d/%d from BIOS ignored\n",
+				       unit+'a', cyl, head, sect);
+			}
+		}
+	        BIOS += 16;
+	}
+#endif
+}
+#endif /* CONFIG_BLK_DEV_IDE */
+
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+
+extern ide_drive_t * get_info_ptr(kdev_t);
+extern unsigned long current_capacity (ide_drive_t *);
+
+/*
+ * If heads is nonzero: find a translation with this many heads and S=63.
+ * Otherwise: find out how OnTrack Disk Manager would translate the disk.
+ */
+static void
+ontrack(ide_drive_t *drive, int heads, unsigned int *c, int *h, int *s) {
+	static const byte dm_head_vals[] = {4, 8, 16, 32, 64, 128, 255, 0};
+	const byte *headp = dm_head_vals;
+	unsigned long total;
+
+	/*
+	 * The specs say: take geometry as obtained from Identify,
+	 * compute total capacity C*H*S from that, and truncate to
+	 * 1024*255*63. Now take S=63, H the first in the sequence
+	 * 4, 8, 16, 32, 64, 128, 255 such that 63*H*1024 >= total.
+	 * [Please tell aeb@cwi.nl in case this computes a
+	 * geometry different from what OnTrack uses.]
+	 */
+	total = DRIVER(drive)->capacity(drive);
+
+	*s = 63;
+
+	if (heads) {
+		*h = heads;
+		*c = total / (63 * heads);
+		return;
+	}
+
+	while (63 * headp[0] * 1024 < total && headp[1] != 0)
+		 headp++;
+	*h = headp[0];
+	*c = total / (63 * headp[0]);
+}
+
+/*
+ * This routine is called from the partition-table code in pt/msdos.c.
+ * It has two tasks:
+ * (i) to handle Ontrack DiskManager by offsetting everything by 63 sectors,
+ *  or to handle EZdrive by remapping sector 0 to sector 1.
+ * (ii) to invent a translated geometry.
+ * Part (i) is suppressed if the user specifies the "noremap" option
+ * on the command line.
+ * Part (ii) is suppressed if the user specifies an explicit geometry.
+ *
+ * The ptheads parameter is either 0 or tells about the number of
+ * heads shown by the end of the first nonempty partition.
+ * If this is either 16, 32, 64, 128, 240 or 255 we'll believe it.
+ *
+ * The xparm parameter has the following meaning:
+ *	 0 = convert to CHS with fewer than 1024 cyls
+ *	     using the same method as Ontrack DiskManager.
+ *	 1 = same as "0", plus offset everything by 63 sectors.
+ *	-1 = similar to "0", plus redirect sector 0 to sector 1.
+ *	 2 = convert to a CHS geometry with "ptheads" heads.
+ *
+ * Returns 0 if the translation was not possible, if the device was not 
+ * an IDE disk drive, or if a geometry was "forced" on the commandline.
+ * Returns 1 if the geometry translation was successful.
+ */
+int ide_xlate_1024 (kdev_t i_rdev, int xparm, int ptheads, const char *msg)
+{
+	ide_drive_t *drive;
+	const char *msg1 = "";
+	int heads = 0;
+	int c, h, s;
+	int transl = 1;		/* try translation */
+	int ret = 0;
+
+	drive = get_info_ptr(i_rdev);
+	if (!drive)
+		return 0;
+
+	/* remap? */
+	if (drive->remap_0_to_1 != 2) {
+		if (xparm == 1) {		/* DM */
+			drive->sect0 = 63;
+			msg1 = " [remap +63]";
+			ret = 1;
+		} else if (xparm == -1) {	/* EZ-Drive */
+			if (drive->remap_0_to_1 == 0) {
+				drive->remap_0_to_1 = 1;
+				msg1 = " [remap 0->1]";
+				ret = 1;
+			}
+		}
+	}
+
+	/* There used to be code here that assigned drive->id->CHS
+	   to drive->CHS and that to drive->bios_CHS. However,
+	   some disks have id->C/H/S = 4092/16/63 but are larger than 2.1 GB.
+	   In such cases that code was wrong.  Moreover,
+	   there seems to be no reason to do any of these things. */
+
+	/* translate? */
+	if (drive->forced_geom)
+		transl = 0;
+
+	/* does ptheads look reasonable? */
+	if (ptheads == 32 || ptheads == 64 || ptheads == 128 ||
+	    ptheads == 240 || ptheads == 255)
+		heads = ptheads;
+
+	if (xparm == 2) {
+		if (!heads ||
+		   (drive->bios_head >= heads && drive->bios_sect == 63))
+			transl = 0;
+	}
+	if (xparm == -1) {
+		if (drive->bios_head > 16)
+			transl = 0;     /* we already have a translation */
+	}
+
+	if (transl) {
+		ontrack(drive, heads, &c, &h, &s);
+		drive->bios_cyl = c;
+		drive->bios_head = h;
+		drive->bios_sect = s;
+		ret = 1;
+	}
+
+	drive->part[0].nr_sects = current_capacity(drive);
+
+	if (ret)
+		printk("%s%s [%d/%d/%d]", msg, msg1,
+		       drive->bios_cyl, drive->bios_head, drive->bios_sect);
+	return ret;
+}
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
diff --git a/xen/drivers/ide/ide-pci.c b/xen/drivers/ide/ide-pci.c
new file mode 100644
index 0000000000..c8784c6f3d
--- /dev/null
+++ b/xen/drivers/ide/ide-pci.c
@@ -0,0 +1,1016 @@
+/*
+ *  linux/drivers/ide/ide-pci.c		Version 1.05	June 9, 2000
+ *
+ *  Copyright (c) 1998-2000  Andre Hedrick <andre@linux-ide.org>
+ *
+ *  Copyright (c) 1995-1998  Mark Lord
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ *  This module provides support for automatic detection and
+ *  configuration of all PCI IDE interfaces present in a system.  
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ide.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#define DEVID_PIIXa	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371FB_0})
+#define DEVID_PIIXb	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371FB_1})
+#define DEVID_MPIIX	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371MX})
+#define DEVID_PIIX3	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371SB_1})
+#define DEVID_PIIX4	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371AB})
+#define DEVID_ICH0	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801AB_1})
+#define DEVID_PIIX4E2	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82443MX_1})
+#define DEVID_ICH	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801AA_1})
+#define DEVID_PIIX4U2	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82372FB_1})
+#define DEVID_PIIX4NX	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82451NX})
+#define DEVID_ICH2	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801BA_9})
+#define DEVID_ICH2M	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801BA_8})
+#define DEVID_ICH3M	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801CA_10})
+#define DEVID_ICH3	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801CA_11})
+#define DEVID_ICH4	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801DB_11})
+#define DEVID_CICH	((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801E_11})
+#define DEVID_VIA_IDE	((ide_pci_devid_t){PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C561})
+#define DEVID_MR_IDE	((ide_pci_devid_t){PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C576_1})
+#define DEVID_VP_IDE	((ide_pci_devid_t){PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C586_1})
+#define DEVID_PDC20246	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20246})
+#define DEVID_PDC20262	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20262})
+#define DEVID_PDC20265	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20265})
+#define DEVID_PDC20267	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20267})
+#define DEVID_PDC20268  ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268})
+#define DEVID_PDC20270  ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20270})
+#define DEVID_PDC20269	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20269})
+#define DEVID_PDC20275	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20275})
+#define DEVID_PDC20276	((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20276})
+#define DEVID_RZ1000	((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH,  PCI_DEVICE_ID_PCTECH_RZ1000})
+#define DEVID_RZ1001	((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH,  PCI_DEVICE_ID_PCTECH_RZ1001})
+#define DEVID_SAMURAI	((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH,  PCI_DEVICE_ID_PCTECH_SAMURAI_IDE})
+#define DEVID_CMD640	((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_640})
+#define DEVID_CMD643	((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_643})
+#define DEVID_CMD646	((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_646})
+#define DEVID_CMD648	((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_648})
+#define DEVID_CMD649	((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_649})
+#define DEVID_CMD680	((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_680})
+#define DEVID_SIS5513	((ide_pci_devid_t){PCI_VENDOR_ID_SI,      PCI_DEVICE_ID_SI_5513})
+#define DEVID_OPTI621	((ide_pci_devid_t){PCI_VENDOR_ID_OPTI,    PCI_DEVICE_ID_OPTI_82C621})
+#define DEVID_OPTI621V	((ide_pci_devid_t){PCI_VENDOR_ID_OPTI,    PCI_DEVICE_ID_OPTI_82C558})
+#define DEVID_OPTI621X	((ide_pci_devid_t){PCI_VENDOR_ID_OPTI,    PCI_DEVICE_ID_OPTI_82C825})
+#define DEVID_TRM290	((ide_pci_devid_t){PCI_VENDOR_ID_TEKRAM,  PCI_DEVICE_ID_TEKRAM_DC290})
+#define DEVID_NS87410	((ide_pci_devid_t){PCI_VENDOR_ID_NS,      PCI_DEVICE_ID_NS_87410})
+#define DEVID_NS87415	((ide_pci_devid_t){PCI_VENDOR_ID_NS,      PCI_DEVICE_ID_NS_87415})
+#define DEVID_HT6565	((ide_pci_devid_t){PCI_VENDOR_ID_HOLTEK,  PCI_DEVICE_ID_HOLTEK_6565})
+#define DEVID_AEC6210	((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP,   PCI_DEVICE_ID_ARTOP_ATP850UF})
+#define DEVID_AEC6260	((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP,   PCI_DEVICE_ID_ARTOP_ATP860})
+#define DEVID_AEC6260R	((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP,   PCI_DEVICE_ID_ARTOP_ATP860R})
+#define DEVID_W82C105	((ide_pci_devid_t){PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105})
+#define DEVID_UM8673F	((ide_pci_devid_t){PCI_VENDOR_ID_UMC,     PCI_DEVICE_ID_UMC_UM8673F})
+#define DEVID_UM8886A	((ide_pci_devid_t){PCI_VENDOR_ID_UMC,     PCI_DEVICE_ID_UMC_UM8886A})
+#define DEVID_UM8886BF	((ide_pci_devid_t){PCI_VENDOR_ID_UMC,     PCI_DEVICE_ID_UMC_UM8886BF})
+#define DEVID_HPT34X	((ide_pci_devid_t){PCI_VENDOR_ID_TTI,     PCI_DEVICE_ID_TTI_HPT343})
+#define DEVID_HPT366	((ide_pci_devid_t){PCI_VENDOR_ID_TTI,     PCI_DEVICE_ID_TTI_HPT366})
+#define DEVID_ALI15X3	((ide_pci_devid_t){PCI_VENDOR_ID_AL,      PCI_DEVICE_ID_AL_M5229})
+#define DEVID_CY82C693	((ide_pci_devid_t){PCI_VENDOR_ID_CONTAQ,  PCI_DEVICE_ID_CONTAQ_82C693})
+#define DEVID_HINT	((ide_pci_devid_t){0x3388,                0x8013})
+#define DEVID_CS5530	((ide_pci_devid_t){PCI_VENDOR_ID_CYRIX,   PCI_DEVICE_ID_CYRIX_5530_IDE})
+#define DEVID_AMD7401	((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_COBRA_7401})
+#define DEVID_AMD7409	((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7409})
+#define DEVID_AMD7411	((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7411})
+#define DEVID_AMD7441	((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7441})
+#define DEVID_PDCADMA	((ide_pci_devid_t){PCI_VENDOR_ID_PDC,     PCI_DEVICE_ID_PDC_1841})
+#define DEVID_SLC90E66	((ide_pci_devid_t){PCI_VENDOR_ID_EFAR,    PCI_DEVICE_ID_EFAR_SLC90E66_1})
+#define DEVID_OSB4	((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE})
+#define DEVID_CSB5	((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE})
+#define DEVID_ITE8172G	((ide_pci_devid_t){PCI_VENDOR_ID_ITE,     PCI_DEVICE_ID_ITE_IT8172G})
+
+#define	IDE_IGNORE	((void *)-1)
+#define IDE_NO_DRIVER	((void *)-2)
+
+#ifdef CONFIG_BLK_DEV_AEC62XX
+extern unsigned int pci_init_aec62xx(struct pci_dev *, const char *);
+extern unsigned int ata66_aec62xx(ide_hwif_t *);
+extern void ide_init_aec62xx(ide_hwif_t *);
+extern void ide_dmacapable_aec62xx(ide_hwif_t *, unsigned long);
+#define PCI_AEC62XX	&pci_init_aec62xx
+#define ATA66_AEC62XX	&ata66_aec62xx
+#define INIT_AEC62XX	&ide_init_aec62xx
+#define DMA_AEC62XX	&ide_dmacapable_aec62xx
+#else
+#define PCI_AEC62XX	NULL
+#define ATA66_AEC62XX	NULL
+#define INIT_AEC62XX	IDE_NO_DRIVER
+#define DMA_AEC62XX	NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_ALI15X3
+extern unsigned int pci_init_ali15x3(struct pci_dev *, const char *);
+extern unsigned int ata66_ali15x3(ide_hwif_t *);
+extern void ide_init_ali15x3(ide_hwif_t *);
+extern void ide_dmacapable_ali15x3(ide_hwif_t *, unsigned long);
+#define PCI_ALI15X3	&pci_init_ali15x3
+#define ATA66_ALI15X3	&ata66_ali15x3
+#define INIT_ALI15X3	&ide_init_ali15x3
+#define DMA_ALI15X3	&ide_dmacapable_ali15x3
+#else
+#define PCI_ALI15X3	NULL
+#define ATA66_ALI15X3	NULL
+#define INIT_ALI15X3	IDE_NO_DRIVER
+#define DMA_ALI15X3	NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_AMD74XX
+extern unsigned int pci_init_amd74xx(struct pci_dev *, const char *);
+extern unsigned int ata66_amd74xx(ide_hwif_t *);
+extern void ide_init_amd74xx(ide_hwif_t *);
+extern void ide_dmacapable_amd74xx(ide_hwif_t *, unsigned long);
+#define PCI_AMD74XX	&pci_init_amd74xx
+#define ATA66_AMD74XX	&ata66_amd74xx
+#define INIT_AMD74XX	&ide_init_amd74xx
+#define DMA_AMD74XX	&ide_dmacapable_amd74xx
+#else
+#define PCI_AMD74XX	NULL
+#define ATA66_AMD74XX	NULL
+#define INIT_AMD74XX	IDE_NO_DRIVER
+#define DMA_AMD74XX	NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_CMD64X
+extern unsigned int pci_init_cmd64x(struct pci_dev *, const char *);
+extern unsigned int ata66_cmd64x(ide_hwif_t *);
+extern void ide_init_cmd64x(ide_hwif_t *);
+extern void ide_dmacapable_cmd64x(ide_hwif_t *, unsigned long);
+#define PCI_CMD64X	&pci_init_cmd64x
+#define ATA66_CMD64X	&ata66_cmd64x
+#define INIT_CMD64X	&ide_init_cmd64x
+#else
+#define PCI_CMD64X	NULL
+#define ATA66_CMD64X	NULL
+#ifdef __sparc_v9__
+#define INIT_CMD64X	IDE_IGNORE
+#else
+#define INIT_CMD64X	IDE_NO_DRIVER
+#endif
+#endif
+
+#ifdef CONFIG_BLK_DEV_CY82C693
+extern unsigned int pci_init_cy82c693(struct pci_dev *, const char *);
+extern void ide_init_cy82c693(ide_hwif_t *);
+#define PCI_CY82C693	&pci_init_cy82c693
+#define INIT_CY82C693	&ide_init_cy82c693
+#else
+#define PCI_CY82C693	NULL
+#define INIT_CY82C693	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_CS5530
+extern unsigned int pci_init_cs5530(struct pci_dev *, const char *);
+extern void ide_init_cs5530(ide_hwif_t *);
+#define PCI_CS5530	&pci_init_cs5530
+#define INIT_CS5530	&ide_init_cs5530
+#else
+#define PCI_CS5530	NULL
+#define INIT_CS5530	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_HPT34X
+extern unsigned int pci_init_hpt34x(struct pci_dev *, const char *);
+extern void ide_init_hpt34x(ide_hwif_t *);
+#define PCI_HPT34X	&pci_init_hpt34x
+#define INIT_HPT34X	&ide_init_hpt34x
+#else
+#define PCI_HPT34X	NULL
+#define INIT_HPT34X	IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_HPT366
+extern byte hpt363_shared_irq;
+extern byte hpt363_shared_pin;
+extern unsigned int pci_init_hpt366(struct pci_dev *, const char *);
+extern unsigned int ata66_hpt366(ide_hwif_t *);
+extern void ide_init_hpt366(ide_hwif_t *);
+extern void ide_dmacapable_hpt366(ide_hwif_t *, unsigned long);
+#define PCI_HPT366	&pci_init_hpt366
+#define ATA66_HPT366	&ata66_hpt366
+#define INIT_HPT366	&ide_init_hpt366
+#define DMA_HPT366	&ide_dmacapable_hpt366
+#else
+static byte hpt363_shared_irq;
+static byte hpt363_shared_pin;
+#define PCI_HPT366	NULL
+#define ATA66_HPT366	NULL
+#define INIT_HPT366	IDE_NO_DRIVER
+#define DMA_HPT366	NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_NS87415
+extern void ide_init_ns87415(ide_hwif_t *);
+#define INIT_NS87415	&ide_init_ns87415
+#else
+#define INIT_NS87415	IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_OPTI621
+extern void ide_init_opti621(ide_hwif_t *);
+#define INIT_OPTI621	&ide_init_opti621
+#else
+#define INIT_OPTI621	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_PDC_ADMA
+extern unsigned int pci_init_pdcadma(struct pci_dev *, const char *);
+extern unsigned int ata66_pdcadma(ide_hwif_t *);
+extern void ide_init_pdcadma(ide_hwif_t *);
+extern void ide_dmacapable_pdcadma(ide_hwif_t *, unsigned long);
+#define PCI_PDCADMA	&pci_init_pdcadma
+#define ATA66_PDCADMA	&ata66_pdcadma
+#define INIT_PDCADMA	&ide_init_pdcadma
+#define DMA_PDCADMA	&ide_dmacapable_pdcadma
+#else
+#define PCI_PDCADMA	IDE_IGNORE
+#define ATA66_PDCADMA	IDE_IGNORE
+#define INIT_PDCADMA	IDE_IGNORE
+#define DMA_PDCADMA	IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_PDC202XX
+extern unsigned int pci_init_pdc202xx(struct pci_dev *, const char *);
+extern unsigned int ata66_pdc202xx(ide_hwif_t *);
+extern void ide_init_pdc202xx(ide_hwif_t *);
+#define PCI_PDC202XX	&pci_init_pdc202xx
+#define ATA66_PDC202XX	&ata66_pdc202xx
+#define INIT_PDC202XX	&ide_init_pdc202xx
+#else
+#define PCI_PDC202XX	NULL
+#define ATA66_PDC202XX	NULL
+#define INIT_PDC202XX	NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_PIIX
+extern unsigned int pci_init_piix(struct pci_dev *, const char *);
+extern unsigned int ata66_piix(ide_hwif_t *);
+extern void ide_init_piix(ide_hwif_t *);
+#define PCI_PIIX	&pci_init_piix
+#define ATA66_PIIX	&ata66_piix
+#define INIT_PIIX	&ide_init_piix
+#else
+#define PCI_PIIX	NULL
+#define ATA66_PIIX	NULL
+#define INIT_PIIX	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_IT8172
+extern unsigned int pci_init_it8172(struct pci_dev *, const char *);
+extern unsigned int ata66_it8172(ide_hwif_t *);
+extern void ide_init_it8172(ide_hwif_t *);
+#define PCI_IT8172	&pci_init_it8172
+#define INIT_IT8172	&ide_init_it8172
+#else
+#define PCI_IT8172	NULL
+#define ATA66_IT8172	NULL
+#define INIT_IT8172	NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_RZ1000
+extern void ide_init_rz1000(ide_hwif_t *);
+#define INIT_RZ1000	&ide_init_rz1000
+#else
+#define INIT_RZ1000	IDE_IGNORE
+#endif
+
+#define INIT_SAMURAI	NULL
+
+#ifdef CONFIG_BLK_DEV_SVWKS
+extern unsigned int pci_init_svwks(struct pci_dev *, const char *);
+extern unsigned int ata66_svwks(ide_hwif_t *);
+extern void ide_init_svwks(ide_hwif_t *);
+#define PCI_SVWKS	&pci_init_svwks
+#define ATA66_SVWKS	&ata66_svwks
+#define INIT_SVWKS	&ide_init_svwks
+#else
+#define PCI_SVWKS	NULL
+#define ATA66_SVWKS	NULL
+#define INIT_SVWKS	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SIS5513
+extern unsigned int pci_init_sis5513(struct pci_dev *, const char *);
+extern unsigned int ata66_sis5513(ide_hwif_t *);
+extern void ide_init_sis5513(ide_hwif_t *);
+#define PCI_SIS5513	&pci_init_sis5513
+#define ATA66_SIS5513	&ata66_sis5513
+#define INIT_SIS5513	&ide_init_sis5513
+#else
+#define PCI_SIS5513	NULL
+#define ATA66_SIS5513	NULL
+#define INIT_SIS5513	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SLC90E66
+extern unsigned int pci_init_slc90e66(struct pci_dev *, const char *);
+extern unsigned int ata66_slc90e66(ide_hwif_t *);
+extern void ide_init_slc90e66(ide_hwif_t *);
+#define PCI_SLC90E66	&pci_init_slc90e66
+#define ATA66_SLC90E66	&ata66_slc90e66
+#define INIT_SLC90E66	&ide_init_slc90e66
+#else
+#define PCI_SLC90E66	NULL
+#define ATA66_SLC90E66	NULL
+#define INIT_SLC90E66	IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SL82C105
+extern unsigned int pci_init_sl82c105(struct pci_dev *, const char *);
+extern void dma_init_sl82c105(ide_hwif_t *, unsigned long);
+extern void ide_init_sl82c105(ide_hwif_t *);
+#define PCI_W82C105	&pci_init_sl82c105
+#define DMA_W82C105	&dma_init_sl82c105
+#define INIT_W82C105	&ide_init_sl82c105
+#else
+#define PCI_W82C105	NULL
+#define DMA_W82C105	NULL
+#define INIT_W82C105	IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_TRM290
+extern void ide_init_trm290(ide_hwif_t *);
+#define INIT_TRM290	&ide_init_trm290
+#else
+#define INIT_TRM290	IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_VIA82CXXX
+extern unsigned int pci_init_via82cxxx(struct pci_dev *, const char *);
+extern unsigned int ata66_via82cxxx(ide_hwif_t *);
+extern void ide_init_via82cxxx(ide_hwif_t *);
+extern void ide_dmacapable_via82cxxx(ide_hwif_t *, unsigned long);
+#define PCI_VIA82CXXX	&pci_init_via82cxxx
+#define ATA66_VIA82CXXX	&ata66_via82cxxx
+#define INIT_VIA82CXXX	&ide_init_via82cxxx
+#define DMA_VIA82CXXX	&ide_dmacapable_via82cxxx
+#else
+#define PCI_VIA82CXXX	NULL
+#define ATA66_VIA82CXXX	NULL
+#define INIT_VIA82CXXX	IDE_NO_DRIVER
+#define DMA_VIA82CXXX	NULL
+#endif
+
+typedef struct ide_pci_enablebit_s {
+	byte	reg;	/* byte pci reg holding the enable-bit */
+	byte	mask;	/* mask to isolate the enable-bit */
+	byte	val;	/* value of masked reg when "enabled" */
+} ide_pci_enablebit_t;
+
+typedef struct ide_pci_device_s {
+	ide_pci_devid_t		devid;
+	char			*name;
+	unsigned int		(*init_chipset)(struct pci_dev *dev, const char *name);
+	unsigned int		(*ata66_check)(ide_hwif_t *hwif);
+	void 			(*init_hwif)(ide_hwif_t *hwif);
+	void			(*dma_init)(ide_hwif_t *hwif, unsigned long dmabase);
+	ide_pci_enablebit_t	enablebits[2];
+	byte			bootable;
+	unsigned int		extra;
+} ide_pci_device_t;
+
+static ide_pci_device_t ide_pci_chipsets[] __initdata = {
+	{DEVID_PIIXa,	"PIIX",		NULL,		NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}}, 	ON_BOARD,	0 },
+	{DEVID_PIIXb,	"PIIX",		NULL,		NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}}, 	ON_BOARD,	0 },
+	{DEVID_MPIIX,	"MPIIX",	NULL,		NULL,		INIT_PIIX,	NULL,		{{0x6D,0x80,0x80}, {0x6F,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_PIIX3,	"PIIX3",	PCI_PIIX,	NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}}, 	ON_BOARD,	0 },
+	{DEVID_PIIX4,	"PIIX4",	PCI_PIIX,	NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}}, 	ON_BOARD,	0 },
+	{DEVID_ICH0,	"ICH0", 	PCI_PIIX,	NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_PIIX4E2,	"PIIX4",	PCI_PIIX,	NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_ICH,	"ICH",  	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_PIIX4U2,	"PIIX4",	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_PIIX4NX,	"PIIX4",	PCI_PIIX,	NULL,		INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_ICH2,	"ICH2", 	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_ICH2M,	"ICH2M",	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_ICH3M,	"ICH3M",	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_ICH3,	"ICH3", 	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_ICH4,	"ICH4", 	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_CICH,	"C-ICH",	PCI_PIIX,	ATA66_PIIX,	INIT_PIIX,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+	{DEVID_VIA_IDE,	"VIA_IDE",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_MR_IDE,	"VP_IDE",	PCI_VIA82CXXX,	ATA66_VIA82CXXX,INIT_VIA82CXXX,	DMA_VIA82CXXX,	{{0x40,0x02,0x02}, {0x40,0x01,0x01}}, 	ON_BOARD,	0 },
+	{DEVID_VP_IDE,	"VP_IDE",	PCI_VIA82CXXX,	ATA66_VIA82CXXX,INIT_VIA82CXXX,	DMA_VIA82CXXX,	{{0x40,0x02,0x02}, {0x40,0x01,0x01}}, 	ON_BOARD,	0 },
+#ifndef CONFIG_PDC202XX_FORCE
+        {DEVID_PDC20246,"PDC20246",	PCI_PDC202XX,	NULL,		INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	16 },
+        {DEVID_PDC20262,"PDC20262",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	48 },
+        {DEVID_PDC20265,"PDC20265",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	48 },
+        {DEVID_PDC20267,"PDC20267",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	48 },
+#else /* !CONFIG_PDC202XX_FORCE */
+	{DEVID_PDC20246,"PDC20246",	PCI_PDC202XX,	NULL,		INIT_PDC202XX,	NULL,		{{0x50,0x02,0x02}, {0x50,0x04,0x04}}, 	OFF_BOARD,	16 },
+	{DEVID_PDC20262,"PDC20262",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x50,0x02,0x02}, {0x50,0x04,0x04}},	OFF_BOARD,	48 },
+	{DEVID_PDC20265,"PDC20265",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x50,0x02,0x02}, {0x50,0x04,0x04}},	OFF_BOARD,	48 },
+	{DEVID_PDC20267,"PDC20267",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x50,0x02,0x02}, {0x50,0x04,0x04}},	OFF_BOARD,	48 },
+#endif
+	{DEVID_PDC20268,"PDC20268",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	0 },
+	/* Promise used a different PCI ident for the raid card apparently to try and
+	   prevent Linux detecting it and using our own raid code. We want to detect
+	   it for the ataraid drivers, so we have to list both here.. */
+	{DEVID_PDC20270,"PDC20270",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	0 },
+	{DEVID_PDC20269,"PDC20269",	PCI_PDC202XX,	ATA66_PDC202XX,	 INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	0 },
+	{DEVID_PDC20275,"PDC20275",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	0 },
+	{DEVID_PDC20276,"PDC20276",	PCI_PDC202XX,	ATA66_PDC202XX,	INIT_PDC202XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	0 },
+	{DEVID_RZ1000,	"RZ1000",	NULL,		NULL,		INIT_RZ1000,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_RZ1001,	"RZ1001",	NULL,		NULL,		INIT_RZ1000,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_SAMURAI,	"SAMURAI",	NULL,		NULL,		INIT_SAMURAI,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_CMD640,	"CMD640",	NULL,		NULL,		IDE_IGNORE,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_NS87410,	"NS87410",	NULL,		NULL,		NULL,		NULL,		{{0x43,0x08,0x08}, {0x47,0x08,0x08}}, 	ON_BOARD,	0 },
+	{DEVID_SIS5513,	"SIS5513",	PCI_SIS5513,	ATA66_SIS5513,	INIT_SIS5513,	NULL,		{{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, 	ON_BOARD,	0 },
+	{DEVID_CMD643,	"CMD643",	PCI_CMD64X,	NULL,		INIT_CMD64X,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_CMD646,	"CMD646",	PCI_CMD64X,	NULL,		INIT_CMD64X,	NULL,		{{0x00,0x00,0x00}, {0x51,0x80,0x80}}, 	ON_BOARD,	0 },
+	{DEVID_CMD648,	"CMD648",	PCI_CMD64X,	ATA66_CMD64X,	INIT_CMD64X,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_CMD649,	"CMD649",	PCI_CMD64X,	ATA66_CMD64X,	INIT_CMD64X,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+#ifndef CONFIG_BLK_DEV_CMD680
+	{DEVID_CMD680,	"CMD680",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+#else /* CONFIG_BLK_DEV_CMD680 */
+	{DEVID_CMD680,	"CMD680",	PCI_CMD64X,	ATA66_CMD64X,	INIT_CMD64X,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+#endif /* !CONFIG_BLK_DEV_CMD680 */
+	{DEVID_HT6565,	"HT6565",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_OPTI621,	"OPTI621",	NULL,		NULL,		INIT_OPTI621,	NULL,		{{0x45,0x80,0x00}, {0x40,0x08,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_OPTI621X,"OPTI621X",	NULL,		NULL,		INIT_OPTI621,	NULL,		{{0x45,0x80,0x00}, {0x40,0x08,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_TRM290,	"TRM290",	NULL,		NULL,		INIT_TRM290,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_NS87415,	"NS87415",	NULL,		NULL,		INIT_NS87415,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_AEC6210,	"AEC6210",	PCI_AEC62XX,	NULL,		INIT_AEC62XX,	DMA_AEC62XX,	{{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, 	OFF_BOARD,	0 },
+	{DEVID_AEC6260,	"AEC6260",	PCI_AEC62XX,	ATA66_AEC62XX,	INIT_AEC62XX,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	NEVER_BOARD,	0 },
+	{DEVID_AEC6260R,"AEC6260R",	PCI_AEC62XX,	ATA66_AEC62XX,	INIT_AEC62XX,	NULL,		{{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},	OFF_BOARD,	0 },
+	{DEVID_W82C105,	"W82C105",	PCI_W82C105,	NULL,		INIT_W82C105,	DMA_W82C105,	{{0x40,0x01,0x01}, {0x40,0x10,0x10}}, 	ON_BOARD,	0 },
+	{DEVID_UM8673F,	"UM8673F",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_UM8886A,	"UM8886A",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_UM8886BF,"UM8886BF",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 },
+	{DEVID_HPT34X,	"HPT34X",	PCI_HPT34X,	NULL,		INIT_HPT34X,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	NEVER_BOARD,	16 },
+	{DEVID_HPT366,	"HPT366",	PCI_HPT366,	ATA66_HPT366,	INIT_HPT366,	DMA_HPT366,	{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	240 },
+	{DEVID_ALI15X3,	"ALI15X3",	PCI_ALI15X3,	ATA66_ALI15X3,	INIT_ALI15X3,	DMA_ALI15X3,	{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_CY82C693,"CY82C693",	PCI_CY82C693,	NULL,		INIT_CY82C693,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_HINT,	"HINT_IDE",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_CS5530,	"CS5530",	PCI_CS5530,	NULL,		INIT_CS5530,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_AMD7401,	"AMD7401",	NULL,		NULL,		NULL,		DMA_AMD74XX,	{{0x40,0x01,0x01}, {0x40,0x02,0x02}},	ON_BOARD,	0 },
+	{DEVID_AMD7409,	"AMD7409",	PCI_AMD74XX,	ATA66_AMD74XX,	INIT_AMD74XX,	DMA_AMD74XX,	{{0x40,0x01,0x01}, {0x40,0x02,0x02}},	ON_BOARD,	0 },
+	{DEVID_AMD7411,	"AMD7411",	PCI_AMD74XX,	ATA66_AMD74XX,	INIT_AMD74XX,	DMA_AMD74XX,	{{0x40,0x01,0x01}, {0x40,0x02,0x02}},	ON_BOARD,	0 },
+	{DEVID_AMD7441,	"AMD7441",	PCI_AMD74XX,	ATA66_AMD74XX,	INIT_AMD74XX,	DMA_AMD74XX,	{{0x40,0x01,0x01}, {0x40,0x02,0x02}},	ON_BOARD,	0 },
+	{DEVID_PDCADMA,	"PDCADMA",	PCI_PDCADMA,	ATA66_PDCADMA,	INIT_PDCADMA,	DMA_PDCADMA,	{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	OFF_BOARD,	0 },
+	{DEVID_SLC90E66,"SLC90E66",	PCI_SLC90E66,	ATA66_SLC90E66,	INIT_SLC90E66,	NULL,		{{0x41,0x80,0x80}, {0x43,0x80,0x80}},	ON_BOARD,	0 },
+        {DEVID_OSB4,    "ServerWorks OSB4",		PCI_SVWKS,	ATA66_SVWKS,	INIT_SVWKS,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_CSB5,	"ServerWorks CSB5",		PCI_SVWKS,	ATA66_SVWKS,	INIT_SVWKS,	NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}},	ON_BOARD,	0 },
+	{DEVID_ITE8172G,"IT8172G",	PCI_IT8172,	NULL,	INIT_IT8172,	NULL,		{{0x00,0x00,0x00}, {0x40,0x00,0x01}},	ON_BOARD,	0 },
+	{IDE_PCI_DEVID_NULL, "PCI_IDE",	NULL,		NULL,		NULL,		NULL,		{{0x00,0x00,0x00}, {0x00,0x00,0x00}}, 	ON_BOARD,	0 }};
+
+/*
+ * This allows offboard ide-pci cards the enable a BIOS, verify interrupt
+ * settings of split-mirror pci-config space, place chipset into init-mode,
+ * and/or preserve an interrupt if the card is not native ide support.
+ */
+static unsigned int __init ide_special_settings (struct pci_dev *dev, const char *name)
+{
+	switch(dev->device) {
+		case PCI_DEVICE_ID_TTI_HPT366:
+		case PCI_DEVICE_ID_PROMISE_20246:
+		case PCI_DEVICE_ID_PROMISE_20262:
+		case PCI_DEVICE_ID_PROMISE_20265:
+		case PCI_DEVICE_ID_PROMISE_20267:
+		case PCI_DEVICE_ID_PROMISE_20268:
+		case PCI_DEVICE_ID_PROMISE_20270:
+		case PCI_DEVICE_ID_PROMISE_20269:
+		case PCI_DEVICE_ID_PROMISE_20275:
+		case PCI_DEVICE_ID_PROMISE_20276:
+		case PCI_DEVICE_ID_ARTOP_ATP850UF:
+		case PCI_DEVICE_ID_ARTOP_ATP860:
+		case PCI_DEVICE_ID_ARTOP_ATP860R:
+			return dev->irq;
+		default:
+			break;
+	}
+	return 0;
+}
+
+/*
+ * Match a PCI IDE port against an entry in ide_hwifs[],
+ * based on io_base port if possible.
+ */
+static ide_hwif_t __init *ide_match_hwif (unsigned long io_base, byte bootable, const char *name)
+{
+	int h;
+	ide_hwif_t *hwif;
+
+	/*
+	 * Look for a hwif with matching io_base specified using
+	 * parameters to ide_setup().
+	 */
+	for (h = 0; h < MAX_HWIFS; ++h) {
+		hwif = &ide_hwifs[h];
+		if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) {
+			if (hwif->chipset == ide_generic)
+				return hwif; /* a perfect match */
+		}
+	}
+	/*
+	 * Look for a hwif with matching io_base default value.
+	 * If chipset is "ide_unknown", then claim that hwif slot.
+	 * Otherwise, some other chipset has already claimed it..  :(
+	 */
+	for (h = 0; h < MAX_HWIFS; ++h) {
+		hwif = &ide_hwifs[h];
+		if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) {
+			if (hwif->chipset == ide_unknown)
+				return hwif; /* match */
+			printk("%s: port 0x%04lx already claimed by %s\n", name, io_base, hwif->name);
+			return NULL;	/* already claimed */
+		}
+	}
+	/*
+	 * Okay, there is no hwif matching our io_base,
+	 * so we'll just claim an unassigned slot.
+	 * Give preference to claiming other slots before claiming ide0/ide1,
+	 * just in case there's another interface yet-to-be-scanned
+	 * which uses ports 1f0/170 (the ide0/ide1 defaults).
+	 *
+	 * Unless there is a bootable card that does not use the standard
+	 * ports 1f0/170 (the ide0/ide1 defaults). The (bootable) flag.
+	 */
+	if (bootable) {
+		for (h = 0; h < MAX_HWIFS; ++h) {
+			hwif = &ide_hwifs[h];
+			if (hwif->chipset == ide_unknown)
+				return hwif;	/* pick an unused entry */
+		}
+	} else {
+		for (h = 2; h < MAX_HWIFS; ++h) {
+			hwif = ide_hwifs + h;
+			if (hwif->chipset == ide_unknown)
+				return hwif;	/* pick an unused entry */
+		}
+	}
+	for (h = 0; h < 2; ++h) {
+		hwif = ide_hwifs + h;
+		if (hwif->chipset == ide_unknown)
+			return hwif;	/* pick an unused entry */
+	}
+	printk("%s: too many IDE interfaces, no room in table\n", name);
+	return NULL;
+}
+
+static int __init ide_setup_pci_baseregs (struct pci_dev *dev, const char *name)
+{
+	byte reg, progif = 0;
+
+	/*
+	 * Place both IDE interfaces into PCI "native" mode:
+	 */
+	if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) {
+		if ((progif & 0xa) != 0xa) {
+			printk("%s: device not capable of full native PCI mode\n", name);
+			return 1;
+		}
+		printk("%s: placing both ports into native PCI mode\n", name);
+		(void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+		if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) {
+			printk("%s: rewrite of PROGIF failed, wanted 0x%04x, got 0x%04x\n", name, progif|5, progif);
+			return 1;
+		}
+	}
+	/*
+	 * Setup base registers for IDE command/control spaces for each interface:
+	 */
+	for (reg = 0; reg < 4; reg++) {
+		struct resource *res = dev->resource + reg;
+		if ((res->flags & IORESOURCE_IO) == 0)
+			continue;
+		if (!res->start) {
+			printk("%s: Missing I/O address #%d\n", name, reg);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * ide_setup_pci_device() looks at the primary/secondary interfaces
+ * on a PCI IDE device and, if they are enabled, prepares the IDE driver
+ * for use with them.  This generic code works for most PCI chipsets.
+ *
+ * One thing that is not standardized is the location of the
+ * primary/secondary interface "enable/disable" bits.  For chipsets that
+ * we "know" about, this information is in the ide_pci_device_t struct;
+ * for all other chipsets, we just assume both interfaces are enabled.
+ */
+static void __init ide_setup_pci_device (struct pci_dev *dev, ide_pci_device_t *d)
+{
+	unsigned int port, at_least_one_hwif_enabled = 0, autodma = 0, pciirq = 0;
+	unsigned short pcicmd = 0, tried_config = 0;
+	byte tmp = 0;
+	ide_hwif_t *hwif, *mate = NULL;
+	unsigned int class_rev;
+	static int secondpdc = 0;
+
+#ifdef CONFIG_IDEDMA_AUTO
+	if (!noautodma)
+		autodma = 1;
+#endif
+
+	if (d->init_hwif == IDE_NO_DRIVER) {
+		printk(KERN_WARNING "%s: detected chipset, but driver not compiled in!\n", d->name);
+		d->init_hwif = NULL;
+	}
+
+	if (pci_enable_device(dev)) {
+		if(pci_enable_device_bars(dev, 1<<4))
+		{
+			printk(KERN_WARNING "%s: (ide_setup_pci_device:) Could not enable device.\n", d->name);
+			return;
+		}
+		printk(KERN_INFO "%s: BIOS setup was incomplete.\n", d->name);
+	}
+
+check_if_enabled:
+	if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) {
+		printk("%s: error accessing PCI regs\n", d->name);
+		return;
+	}
+	if (!(pcicmd & PCI_COMMAND_IO)) {	/* is device disabled? */
+		/*
+		 * PnP BIOS was *supposed* to have set this device up for us,
+		 * but we can do it ourselves, so long as the BIOS has assigned an IRQ
+		 *  (or possibly the device is using a "legacy header" for IRQs).
+		 * Maybe the user deliberately *disabled* the device,
+		 * but we'll eventually ignore it again if no drives respond.
+		 */
+		if (tried_config++
+		 || ide_setup_pci_baseregs(dev, d->name)
+		 || pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_IO)) {
+			printk("%s: device disabled (BIOS)\n", d->name);
+			return;
+		}
+		autodma = 0;	/* default DMA off if we had to configure it here */
+		goto check_if_enabled;
+	}
+	if (tried_config)
+		printk("%s: device enabled (Linux)\n", d->name);
+
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+	class_rev &= 0xff;
+
+	if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X)) {
+		/* see comments in hpt34x.c on why..... */
+		char *chipset_names[] = {"HPT343", "HPT345"};
+		strcpy(d->name, chipset_names[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0]);
+		d->bootable = (pcicmd & PCI_COMMAND_MEMORY) ? OFF_BOARD : NEVER_BOARD;
+	}
+
+	printk("%s: chipset revision %d\n", d->name, class_rev);
+
+	/*
+	 * Can we trust the reported IRQ?
+	 */
+	pciirq = dev->irq;
+	
+	if (dev->class >> 8 == PCI_CLASS_STORAGE_RAID)
+	{
+	    /*  By rights we want to ignore these, but the Promise Fastrak
+		 *	people have some strange ideas about proprietary so we have
+		 *	to act otherwise on those. The supertrak however we need
+		 *	to skip 
+		 */
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265))
+		{
+			printk(KERN_INFO "ide: Found promise 20265 in RAID mode.\n");
+			if(dev->bus->self && dev->bus->self->vendor == PCI_VENDOR_ID_INTEL &&
+			   dev->bus->self->device == PCI_DEVICE_ID_INTEL_I960)
+			{
+				printk(KERN_INFO "ide: Skipping Promise PDC20265 attached to I2O RAID controller.\n");
+				return;
+			}
+		}
+		/* Its attached to something else, just a random bridge.
+		   Suspect a fastrak and fall through */
+	}
+
+	if ((dev->class & ~(0xfa)) != ((PCI_CLASS_STORAGE_IDE << 8) | 5)) {
+		printk("%s: not 100%% native mode: will probe irqs later\n", d->name);
+		/*
+		 * This allows offboard ide-pci cards the enable a BIOS,
+		 * verify interrupt settings of split-mirror pci-config
+		 * space, place chipset into init-mode, and/or preserve
+		 * an interrupt if the card is not native ide support.
+		 */
+		pciirq = (d->init_chipset) ? d->init_chipset(dev, d->name) : ide_special_settings(dev, d->name);
+	} else if (tried_config) {
+		printk("%s: will probe irqs later\n", d->name);
+		pciirq = 0;
+	} else if (!pciirq) {
+		printk("%s: bad irq (%d): will probe later\n", d->name, pciirq);
+		pciirq = 0;
+	} else {
+		if (d->init_chipset)
+			(void) d->init_chipset(dev, d->name);
+#ifdef __sparc__
+		printk("%s: 100%% native mode on irq %s\n",
+		       d->name, __irq_itoa(pciirq));
+#else
+		printk("%s: 100%% native mode on irq %d\n", d->name, pciirq);
+#endif
+	}
+
+	/*
+	 * Set up the IDE ports
+	 */
+	for (port = 0; port <= 1; ++port) {
+		unsigned long base = 0, ctl = 0;
+		ide_pci_enablebit_t *e = &(d->enablebits[port]);
+	
+		/* 
+		 * If this is a Promise FakeRaid controller, the 2nd controller will be marked as 
+		 * disabled while it is actually there and enabled by the bios for raid purposes. 
+		 * Skip the normal "is it enabled" test for those.
+		 */
+		if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265)) && (secondpdc++==1) && (port==1)  ) 
+			goto controller_ok;
+		if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262)) && (secondpdc++==1) && (port==1)  ) 
+			goto controller_ok;
+			
+		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) || (tmp & e->mask) != e->val))
+			continue;	/* port not enabled */
+controller_ok:			
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) && (port) && (class_rev < 0x03))
+			return;
+		if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE || (dev->class & (port ? 4 : 1)) != 0) {
+			ctl  = dev->resource[(2*port)+1].start;
+			base = dev->resource[2*port].start;
+			if (!(ctl & PCI_BASE_ADDRESS_IO_MASK) ||
+			    !(base & PCI_BASE_ADDRESS_IO_MASK)) {
+				printk("%s: IO baseregs (BIOS) are reported as MEM, report to <andre@linux-ide.org>.\n", d->name);
+#if 0
+				/* FIXME! This really should check that it really gets the IO/MEM part right! */
+				continue;
+#endif
+			}
+		}
+		if ((ctl && !base) || (base && !ctl)) {
+			printk("%s: inconsistent baseregs (BIOS) for port %d, skipping\n", d->name, port);
+			continue;
+		}
+		if (!ctl)
+			ctl = port ? 0x374 : 0x3f4;	/* use default value */
+		if (!base)
+			base = port ? 0x170 : 0x1f0;	/* use default value */
+		if ((hwif = ide_match_hwif(base, d->bootable, d->name)) == NULL)
+			continue;	/* no room in ide_hwifs[] */
+		if (hwif->io_ports[IDE_DATA_OFFSET] != base) {
+			ide_init_hwif_ports(&hwif->hw, base, (ctl | 2), NULL);
+			memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+			hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
+		}
+		hwif->chipset = ide_pci;
+		hwif->pci_dev = dev;
+		hwif->pci_devid = d->devid;
+		hwif->channel = port;
+		if (!hwif->irq)
+			hwif->irq = pciirq;
+		if (mate) {
+			hwif->mate = mate;
+			mate->mate = hwif;
+			if (IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210)) {
+				hwif->serialized = 1;
+				mate->serialized = 1;
+			}
+		}
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886BF) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8673F)) {
+			hwif->irq = hwif->channel ? 15 : 14;
+			goto bypass_umc_dma;
+		}
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_MPIIX))
+			goto bypass_piix_dma;
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDCADMA))
+			goto bypass_legacy_dma;
+		if (hwif->udma_four) {
+			printk("%s: ATA-66/100 forced bit set (WARNING)!!\n", d->name);
+		} else {
+			hwif->udma_four = (d->ata66_check) ? d->ata66_check(hwif) : 0;
+		}
+#ifdef CONFIG_BLK_DEV_IDEDMA
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_SIS5513) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PIIX4NX) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X)  ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_VIA_IDE) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_MR_IDE)  ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_VP_IDE))
+			autodma = 0;
+		if (autodma)
+			hwif->autodma = 1;
+
+		if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20246) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20267) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20269) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20275) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20276) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260R) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD646) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD648) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD649) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD680) ||
+		    IDE_PCI_DEVID_EQ(d->devid, DEVID_OSB4) ||
+		    ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 0x80))) {
+			unsigned long dma_base = ide_get_or_set_dma_base(hwif, (!mate && d->extra) ? d->extra : 0, d->name);
+			if (dma_base && !(pcicmd & PCI_COMMAND_MASTER)) {
+				/*
+ 	 			 * Set up BM-DMA capability (PnP BIOS should have done this)
+ 	 			 */
+		    		if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530))
+					hwif->autodma = 0;	/* default DMA off if we had to configure it here */
+				(void) pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_MASTER);
+				if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) || !(pcicmd & PCI_COMMAND_MASTER)) {
+					printk("%s: %s error updating PCICMD\n", hwif->name, d->name);
+					dma_base = 0;
+				}
+			}
+			if (dma_base) {
+				if (d->dma_init) {
+					d->dma_init(hwif, dma_base);
+				} else {
+					ide_setup_dma(hwif, dma_base, 8);
+				}
+			} else {
+				printk("%s: %s Bus-Master DMA disabled (BIOS)\n", hwif->name, d->name);
+			}
+		}
+#endif	/* CONFIG_BLK_DEV_IDEDMA */
+bypass_legacy_dma:
+bypass_piix_dma:
+bypass_umc_dma:
+		if (d->init_hwif)  /* Call chipset-specific routine for each enabled hwif */
+			d->init_hwif(hwif);
+		mate = hwif;
+		at_least_one_hwif_enabled = 1;
+	}
+	if (!at_least_one_hwif_enabled)
+		printk("%s: neither IDE port enabled (BIOS)\n", d->name);
+}
+
+static void __init pdc20270_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d)
+{
+	struct pci_dev *dev2 = NULL, *findev;
+	ide_pci_device_t *d2;
+
+	if ((dev->bus->self &&
+	     dev->bus->self->vendor == PCI_VENDOR_ID_DEC) &&
+	    (dev->bus->self->device == PCI_DEVICE_ID_DEC_21150)) {
+		if (PCI_SLOT(dev->devfn) & 2) {
+			return;
+		}
+		d->extra = 0;
+		pci_for_each_dev(findev) {
+			if ((findev->vendor == dev->vendor) &&
+			    (findev->device == dev->device) &&
+			    (PCI_SLOT(findev->devfn) & 2)) {
+				byte irq = 0, irq2 = 0;
+				dev2 = findev;
+				pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+				pci_read_config_byte(dev2, PCI_INTERRUPT_LINE, &irq2);
+                                if (irq != irq2) {
+					dev2->irq = dev->irq;
+                                        pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, irq);
+                                }
+
+			}
+		}
+	}
+
+	printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+	ide_setup_pci_device(dev, d);
+	if (!dev2)
+		return;
+	d2 = d;
+	printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn);
+	ide_setup_pci_device(dev2, d2);
+}
+
+static void __init hpt366_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d)
+{
+	struct pci_dev *dev2 = NULL, *findev;
+	ide_pci_device_t *d2;
+	unsigned char pin1 = 0, pin2 = 0;
+	unsigned int class_rev;
+	char *chipset_names[] = {"HPT366", "HPT366", "HPT368", "HPT370", "HPT370A", "HPT372"};
+
+	if (PCI_FUNC(dev->devfn) & 1)
+		return;
+
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+	class_rev &= 0xff;
+	if (class_rev > 5)
+		class_rev = 5;
+	
+	strcpy(d->name, chipset_names[class_rev]);
+
+	switch(class_rev) {
+		case 4:
+		case 3:	printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+			ide_setup_pci_device(dev, d);
+			return;
+		default:	break;
+	}
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin1);
+	pci_for_each_dev(findev) {
+		if ((findev->vendor == dev->vendor) &&
+		    (findev->device == dev->device) &&
+		    ((findev->devfn - dev->devfn) == 1) &&
+		    (PCI_FUNC(findev->devfn) & 1)) {
+			dev2 = findev;
+			pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2);
+			hpt363_shared_pin = (pin1 != pin2) ? 1 : 0;
+			hpt363_shared_irq = (dev->irq == dev2->irq) ? 1 : 0;
+			if (hpt363_shared_pin && hpt363_shared_irq) {
+				d->bootable = ON_BOARD;
+				printk("%s: onboard version of chipset, pin1=%d pin2=%d\n", d->name, pin1, pin2);
+#if 0
+				/* I forgot why I did this once, but it fixed something. */
+				pci_write_config_byte(dev2, PCI_INTERRUPT_PIN, dev->irq);
+				printk("PCI: %s: Fixing interrupt %d pin %d to ZERO \n", d->name, dev2->irq, pin2);
+				pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, 0);
+#endif
+			}
+			break;
+		}
+	}
+	printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+	ide_setup_pci_device(dev, d);
+	if (!dev2)
+		return;
+	d2 = d;
+	printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn);
+	ide_setup_pci_device(dev2, d2);
+}
+
+/*
+ * ide_scan_pcibus() gets invoked at boot time from ide.c.
+ * It finds all PCI IDE controllers and calls ide_setup_pci_device for them.
+ */
+void __init ide_scan_pcidev (struct pci_dev *dev)
+{
+	ide_pci_devid_t		devid;
+	ide_pci_device_t	*d;
+
+	devid.vid = dev->vendor;
+	devid.did = dev->device;
+	for (d = ide_pci_chipsets; d->devid.vid && !IDE_PCI_DEVID_EQ(d->devid, devid); ++d);
+	if (d->init_hwif == IDE_IGNORE)
+		printk("%s: ignored by ide_scan_pci_device() (uses own driver)\n", d->name);
+	else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_OPTI621V) && !(PCI_FUNC(dev->devfn) & 1))
+		return;
+	else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))
+		return;	/* CY82C693 is more than only a IDE controller */
+	else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_ITE8172G) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))
+		return;	/* IT8172G is also more than only an IDE controller */
+	else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) && !(PCI_FUNC(dev->devfn) & 1))
+		return;	/* UM8886A/BF pair */
+	else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366))
+		hpt366_device_order_fixup(dev, d);
+	else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270))
+		pdc20270_device_order_fixup(dev, d);
+	else if (!IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL) || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
+		if (IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL))
+			printk("%s: unknown IDE controller on PCI bus %02x device %02x, VID=%04x, DID=%04x\n",
+			       d->name, dev->bus->number, dev->devfn, devid.vid, devid.did);
+		else
+			printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+		ide_setup_pci_device(dev, d);
+	}
+}
+
+void __init ide_scan_pcibus (int scan_direction)
+{
+	struct pci_dev *dev;
+
+	if (!scan_direction) {
+		pci_for_each_dev(dev) {
+			ide_scan_pcidev(dev);
+		}
+	} else {
+		pci_for_each_dev_reverse(dev) {
+			ide_scan_pcidev(dev);
+		}
+	}
+}
diff --git a/xen/drivers/ide/ide-probe.c b/xen/drivers/ide/ide-probe.c
new file mode 100644
index 0000000000..99f38dfcb8
--- /dev/null
+++ b/xen/drivers/ide/ide-probe.c
@@ -0,0 +1,1066 @@
+/*
+ *  linux/drivers/ide/ide-probe.c	Version 1.07	March 18, 2001
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Mostly written by Mark Lord <mlord@pobox.com>
+ *                and Gadi Oxman <gadio@netvision.net.il>
+ *                and Andre Hedrick <andre@linux-ide.org>
+ *
+ *  See linux/MAINTAINERS for address of current maintainer.
+ *
+ * This is the IDE probe module, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00		move drive probing code from ide.c to ide-probe.c
+ * Version 1.01		fix compilation problem for m68k
+ * Version 1.02		increase WAIT_PIDENTIFY to avoid CD-ROM locking at boot
+ *			 by Andrea Arcangeli
+ * Version 1.03		fix for (hwif->chipset == ide_4drives)
+ * Version 1.04		fixed buggy treatments of known flash memory cards
+ *
+ * Version 1.05		fix for (hwif->chipset == ide_pdc4030)
+ *			added ide6/7/8/9
+ *			allowed for secondary flash card to be detectable
+ *			 with new flag : drive->ata_flash : 1;
+ * Version 1.06		stream line request queue and prep for cascade project.
+ * Version 1.07		max_sect <= 255; slower disks would get behind and
+ * 			then fall over when they get to 256.	Paul G.
+ */
+
+#undef REALLY_SLOW_IO		/* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+#include <xeno/spinlock.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define IDE_PROBE_TRACE 0
+
+static inline void do_identify (ide_drive_t *drive, byte cmd)
+{
+	int bswap = 1;
+	struct hd_driveid *id;
+
+	if (IDE_PROBE_TRACE)
+	{
+	  printk (KERN_ALERT "ide-probe::do_identify\n");
+	}
+
+	id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC);	/* called with interrupts disabled! */
+	if (!id) {
+		printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n");
+		goto err_kmalloc;
+	}
+
+	ide_input_data(drive, id, SECTOR_WORDS);		/* read 512 bytes of id info */
+	ide__sti();	/* local CPU only */
+	ide_fix_driveid(id);
+
+	if (id->word156 == 0x4d42) {
+		printk("%s: drive->id->word156 == 0x%04x \n", drive->name, drive->id->word156);
+	}
+
+	if (!drive->forced_lun)
+		drive->last_lun = id->last_lun & 0x7;
+#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA)
+	/*
+	 * EATA SCSI controllers do a hardware ATA emulation:
+	 * Ignore them if there is a driver for them available.
+	 */
+	if ((id->model[0] == 'P' && id->model[1] == 'M')
+	 || (id->model[0] == 'S' && id->model[1] == 'K')) {
+		printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
+		goto err_misc;
+	}
+#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */
+
+	/*
+	 *  WIN_IDENTIFY returns little-endian info,
+	 *  WIN_PIDENTIFY *usually* returns little-endian info.
+	 */
+	if (cmd == WIN_PIDENTIFY) {
+		if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */
+		 || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */
+		 || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */
+			bswap ^= 1;	/* Vertos drives may still be weird */
+	}
+	ide_fixstring (id->model,     sizeof(id->model),     bswap);
+	ide_fixstring (id->fw_rev,    sizeof(id->fw_rev),    bswap);
+	ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap);
+
+	if (strstr(id->model, "E X A B Y T E N E S T"))
+		goto err_misc;
+
+	id->model[sizeof(id->model)-1] = '\0';	/* we depend on this a lot! */
+	printk("%s: %s, ", drive->name, id->model);
+	drive->present = 1;
+
+	/*
+	 * Check for an ATAPI device
+	 */
+	if (cmd == WIN_PIDENTIFY) {
+		byte type = (id->config >> 8) & 0x1f;
+		printk("ATAPI ");
+#ifdef CONFIG_BLK_DEV_PDC4030
+		if (HWIF(drive)->channel == 1 && HWIF(drive)->chipset == ide_pdc4030) {
+			printk(" -- not supported on 2nd Promise port\n");
+			goto err_misc;
+		}
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+		switch (type) {
+			case ide_floppy:
+				if (!strstr(id->model, "CD-ROM")) {
+					if (!strstr(id->model, "oppy") && !strstr(id->model, "poyp") && !strstr(id->model, "ZIP"))
+						printk("cdrom or floppy?, assuming ");
+					if (drive->media != ide_cdrom) {
+						printk ("FLOPPY");
+						break;
+					}
+				}
+				type = ide_cdrom;	/* Early cdrom models used zero */
+			case ide_cdrom:
+				drive->removable = 1;
+#ifdef CONFIG_PPC
+				/* kludge for Apple PowerBook internal zip */
+				if (!strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP")) {
+					printk ("FLOPPY");
+					type = ide_floppy;
+					break;
+				}
+#endif
+				printk ("CD/DVD-ROM");
+				break;
+			case ide_tape:
+				printk ("TAPE");
+				break;
+			case ide_optical:
+				printk ("OPTICAL");
+				drive->removable = 1;
+				break;
+			default:
+				printk("UNKNOWN (type %d)", type);
+				break;
+		}
+		printk (" drive\n");
+		drive->media = type;
+		return;
+	}
+
+	/*
+	 * Not an ATAPI device: looks like a "regular" hard disk
+	 */
+	if (id->config & (1<<7))
+		drive->removable = 1;
+	/*
+	 * Prevent long system lockup probing later for non-existant
+	 * slave drive if the hwif is actually a flash memory card of some variety:
+	 */
+	if (drive_is_flashcard(drive)) {
+		ide_drive_t *mate = &HWIF(drive)->drives[1^drive->select.b.unit];
+		if (!mate->ata_flash) {
+			mate->present = 0;
+			mate->noprobe = 1;
+		}
+	}
+	drive->media = ide_disk;
+	printk("ATA DISK drive\n");
+	QUIRK_LIST(HWIF(drive),drive);
+	return;
+
+err_misc:
+	kfree(id);
+err_kmalloc:
+	drive->present = 0;
+	return;
+}
+
+/*
+ * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive
+ * and waits for a response.  It also monitors irqs while this is
+ * happening, in hope of automatically determining which one is
+ * being used by the interface.
+ *
+ * Returns:	0  device was identified
+ *		1  device timed-out (no response to identify request)
+ *		2  device aborted the command (refused to identify itself)
+ */
+static int actual_try_to_identify (ide_drive_t *drive, byte cmd)
+{
+	int rc;
+	ide_ioreg_t hd_status;
+	unsigned long timeout;
+	byte s, a;
+
+	if (IDE_PROBE_TRACE)
+	{
+	  printk (KERN_ALERT "ide-probe::actual_try_to_identify\n");
+	}
+
+	if (IDE_CONTROL_REG) {
+		/* take a deep breath */
+		ide_delay_50ms();
+		a = IN_BYTE(IDE_ALTSTATUS_REG);
+		s = IN_BYTE(IDE_STATUS_REG);
+		if ((a ^ s) & ~INDEX_STAT) {
+			printk("%s: probing with STATUS(0x%02x) instead of ALTSTATUS(0x%02x)\n", drive->name, s, a);
+			hd_status = IDE_STATUS_REG;	/* ancient Seagate drives, broken interfaces */
+		} else {
+			hd_status = IDE_ALTSTATUS_REG;	/* use non-intrusive polling */
+		}
+	} else {
+		ide_delay_50ms();
+		hd_status = IDE_STATUS_REG;
+	}
+
+	/* set features register for atapi identify command to be sure of reply */
+	if ((cmd == WIN_PIDENTIFY))
+		OUT_BYTE(0,IDE_FEATURE_REG);	/* disable dma & overlap */
+
+#if CONFIG_BLK_DEV_PDC4030
+	if (HWIF(drive)->chipset == ide_pdc4030) {
+		/* DC4030 hosted drives need their own identify... */
+		extern int pdc4030_identify(ide_drive_t *);
+		if (pdc4030_identify(drive)) {
+			return 1;
+		}
+	} else
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+		OUT_BYTE(cmd,IDE_COMMAND_REG);		/* ask drive for ID */
+	timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
+	timeout += jiffies;
+	do {
+		if (0 < (signed long)(jiffies - timeout)) {
+			return 1;	/* drive timed-out */
+		}
+		ide_delay_50ms();		/* give drive a breather */
+	} while (IN_BYTE(hd_status) & BUSY_STAT);
+
+	ide_delay_50ms();		/* wait for IRQ and DRQ_STAT */
+	if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+		unsigned long flags;
+		__save_flags(flags);	/* local CPU only */
+		__cli();		/* local CPU only; some systems need this */
+		do_identify(drive, cmd); /* drive returned ID */
+		rc = 0;			/* drive responded with ID */
+		(void) GET_STAT();	/* clear drive IRQ */
+		__restore_flags(flags);	/* local CPU only */
+	} else
+		rc = 2;			/* drive refused ID */
+	return rc;
+}
+
+static int try_to_identify (ide_drive_t *drive, byte cmd)
+{
+	int retval;
+	int autoprobe = 0;
+	unsigned long cookie = 0;
+
+	if (IDE_PROBE_TRACE)
+	{
+	  printk (KERN_ALERT "ide-probe::try_to_identify\n");
+	}
+
+	if (IDE_CONTROL_REG && !HWIF(drive)->irq) {
+		autoprobe = 1;
+		cookie = probe_irq_on();
+		OUT_BYTE(drive->ctl,IDE_CONTROL_REG);	/* enable device irq */
+	}
+
+	retval = actual_try_to_identify(drive, cmd);
+
+	if (autoprobe) {
+		int irq;
+		OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG);	/* mask device irq */
+		(void) GET_STAT();			/* clear drive IRQ */
+		udelay(5);
+		irq = probe_irq_off(cookie);
+		if (!HWIF(drive)->irq) {
+			if (irq > 0) {
+				HWIF(drive)->irq = irq;
+			} else {	/* Mmmm.. multiple IRQs.. don't know which was ours */
+				printk("%s: IRQ probe failed (0x%lx)\n", drive->name, cookie);
+#ifdef CONFIG_BLK_DEV_CMD640
+#ifdef CMD640_DUMP_REGS
+				if (HWIF(drive)->chipset == ide_cmd640) {
+					printk("%s: Hmmm.. probably a driver problem.\n", drive->name);
+					CMD640_DUMP_REGS;
+				}
+#endif /* CMD640_DUMP_REGS */
+#endif /* CONFIG_BLK_DEV_CMD640 */
+			}
+		}
+	}
+	return retval;
+}
+
+
+/*
+ * do_probe() has the difficult job of finding a drive if it exists,
+ * without getting hung up if it doesn't exist, without trampling on
+ * ethernet cards, and without leaving any IRQs dangling to haunt us later.
+ *
+ * If a drive is "known" to exist (from CMOS or kernel parameters),
+ * but does not respond right away, the probe will "hang in there"
+ * for the maximum wait time (about 30 seconds), otherwise it will
+ * exit much more quickly.
+ *
+ * Returns:	0  device was identified
+ *		1  device timed-out (no response to identify request)
+ *		2  device aborted the command (refused to identify itself)
+ *		3  bad status from device (possible for ATAPI drives)
+ *		4  probe was not attempted because failure was obvious
+ */
+static int do_probe (ide_drive_t *drive, byte cmd)
+{
+	int rc;
+	ide_hwif_t *hwif = HWIF(drive);
+
+	if (IDE_PROBE_TRACE)
+	{
+	  printk (KERN_ALERT "ide-probe::do_probe\n");
+	}
+
+	if (drive->present) {	/* avoid waiting for inappropriate probes */
+		if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY))
+			return 4;
+	}
+#ifdef DEBUG
+	printk("probing for %s: present=%d, media=%d, probetype=%s\n",
+		drive->name, drive->present, drive->media,
+		(cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI");
+#endif
+	ide_delay_50ms();	/* needed for some systems (e.g. crw9624 as drive0 with disk as slave) */
+	SELECT_DRIVE(hwif,drive);
+	ide_delay_50ms();
+	if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) {
+		if (drive->select.b.unit != 0) {
+			SELECT_DRIVE(hwif,&hwif->drives[0]);	/* exit with drive0 selected */
+			ide_delay_50ms();		/* allow BUSY_STAT to assert & clear */
+		}
+		return 3;    /* no i/f present: mmm.. this should be a 4 -ml */
+	}
+
+	if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT)
+	 || drive->present || cmd == WIN_PIDENTIFY)
+	{
+		if ((rc = try_to_identify(drive,cmd)))   /* send cmd and wait */
+			rc = try_to_identify(drive,cmd); /* failed: try again */
+		if (rc == 1 && cmd == WIN_PIDENTIFY && drive->autotune != 2) {
+			unsigned long timeout;
+			printk("%s: no response (status = 0x%02x), resetting drive\n", drive->name, GET_STAT());
+			ide_delay_50ms();
+			OUT_BYTE (drive->select.all, IDE_SELECT_REG);
+			ide_delay_50ms();
+			OUT_BYTE(WIN_SRST, IDE_COMMAND_REG);
+			timeout = jiffies;
+			while ((GET_STAT() & BUSY_STAT) && time_before(jiffies, timeout + WAIT_WORSTCASE))
+				ide_delay_50ms();
+			rc = try_to_identify(drive, cmd);
+		}
+		if (rc == 1)
+			printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT());
+		(void) GET_STAT();		/* ensure drive irq is clear */
+	} else {
+		rc = 3;				/* not present or maybe ATAPI */
+	}
+	if (drive->select.b.unit != 0) {
+		SELECT_DRIVE(hwif,&hwif->drives[0]);	/* exit with drive0 selected */
+		ide_delay_50ms();
+		(void) GET_STAT();		/* ensure drive irq is clear */
+	}
+	return rc;
+}
+
+/*
+ *
+ */
+static void enable_nest (ide_drive_t *drive)
+{
+	unsigned long timeout;
+
+	if (IDE_PROBE_TRACE)
+	{
+	  printk (KERN_ALERT "ide-probe::enable_nest\n");
+	}
+
+	printk("%s: enabling %s -- ", HWIF(drive)->name, drive->id->model);
+	SELECT_DRIVE(HWIF(drive), drive);
+	ide_delay_50ms();
+	OUT_BYTE(EXABYTE_ENABLE_NEST, IDE_COMMAND_REG);
+	timeout = jiffies + WAIT_WORSTCASE;
+	do {
+		if (time_after(jiffies, timeout)) {
+			printk("failed (timeout)\n");
+			return;
+		}
+		ide_delay_50ms();
+	} while (GET_STAT() & BUSY_STAT);
+	ide_delay_50ms();
+	if (!OK_STAT(GET_STAT(), 0, BAD_STAT))
+		printk("failed (status = 0x%02x)\n", GET_STAT());
+	else
+		printk("success\n");
+	if (do_probe(drive, WIN_IDENTIFY) >= 2) {	/* if !(success||timed-out) */
+		(void) do_probe(drive, WIN_PIDENTIFY);	/* look for ATAPI device */
+	}
+}
+
+/*
+ * probe_for_drive() tests for existence of a given drive using do_probe().
+ *
+ * Returns:	0  no device was found
+ *		1  device was found (note: drive->present might still be 0)
+ */
+static inline byte probe_for_drive (ide_drive_t *drive)
+{
+  if (IDE_PROBE_TRACE)
+  {
+    printk (KERN_ALERT "ide-probe::probe_for_drive\n");
+  }
+
+	if (drive->noprobe)			/* skip probing? */
+		return drive->present;
+	if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
+		(void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */
+	}
+	if (drive->id && strstr(drive->id->model, "E X A B Y T E N E S T"))
+		enable_nest(drive);
+	if (!drive->present)
+		return 0;			/* drive not found */
+	if (drive->id == NULL) {		/* identification failed? */
+		if (drive->media == ide_disk) {
+			printk ("%s: non-IDE drive, CHS=%d/%d/%d\n",
+			 drive->name, drive->cyl, drive->head, drive->sect);
+		} else if (drive->media == ide_cdrom) {
+			printk("%s: ATAPI cdrom (?)\n", drive->name);
+		} else {
+			drive->present = 0;	/* nuke it */
+		}
+	}
+	return 1;	/* drive was found */
+}
+
+/*
+ * Calculate the region that this interface occupies,
+ * handling interfaces where the registers may not be
+ * ordered sanely.  We deal with the CONTROL register
+ * separately.
+ */
+static int hwif_check_regions (ide_hwif_t *hwif)
+{
+	int region_errors = 0;
+
+	hwif->straight8 = 0;
+	region_errors  = ide_check_region(hwif->io_ports[IDE_DATA_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_ERROR_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_LCYL_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_HCYL_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_SELECT_OFFSET], 1);
+	region_errors += ide_check_region(hwif->io_ports[IDE_STATUS_OFFSET], 1);
+
+	if (hwif->io_ports[IDE_CONTROL_OFFSET])
+		region_errors += ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+	if (hwif->io_ports[IDE_IRQ_OFFSET])
+		region_errors += ide_check_region(hwif->io_ports[IDE_IRQ_OFFSET], 1);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+	/*
+	 * If any errors are return, we drop the hwif interface.
+	 */
+	return(region_errors);
+}
+
+static void hwif_register (ide_hwif_t *hwif)
+{
+	if (((unsigned long)hwif->io_ports[IDE_DATA_OFFSET] | 7) ==
+	    ((unsigned long)hwif->io_ports[IDE_STATUS_OFFSET])) {
+		ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 8, hwif->name);
+		hwif->straight8 = 1;
+		goto jump_straight8;
+	}
+
+	if (hwif->io_ports[IDE_DATA_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_ERROR_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_ERROR_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_NSECTOR_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_SECTOR_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_LCYL_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_LCYL_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_HCYL_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_HCYL_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_SELECT_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_SELECT_OFFSET], 1, hwif->name);
+	if (hwif->io_ports[IDE_STATUS_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_STATUS_OFFSET], 1, hwif->name);
+
+jump_straight8:
+	if (hwif->io_ports[IDE_CONTROL_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1, hwif->name);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+	if (hwif->io_ports[IDE_IRQ_OFFSET])
+		ide_request_region(hwif->io_ports[IDE_IRQ_OFFSET], 1, hwif->name);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+}
+
+/*
+ * This routine only knows how to look for drive units 0 and 1
+ * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
+ */
+static void probe_hwif (ide_hwif_t *hwif)
+{
+	unsigned int unit;
+	unsigned long flags;
+
+  if (IDE_PROBE_TRACE)
+  {
+    printk (KERN_ALERT "ide-probe::probe_hwif\n");
+  }
+
+	if (hwif->noprobe)
+		return;
+#ifdef CONFIG_BLK_DEV_IDE
+	if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) {
+		extern void probe_cmos_for_drives(ide_hwif_t *);
+
+		probe_cmos_for_drives (hwif);
+	}
+#endif
+
+	if ((hwif->chipset != ide_4drives || !hwif->mate->present) &&
+#if CONFIG_BLK_DEV_PDC4030
+	    (hwif->chipset != ide_pdc4030 || hwif->channel == 0) &&
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+	    (hwif_check_regions(hwif))) {
+		int msgout = 0;
+		for (unit = 0; unit < MAX_DRIVES; ++unit) {
+			ide_drive_t *drive = &hwif->drives[unit];
+			if (drive->present) {
+				drive->present = 0;
+				printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name);
+				msgout = 1;
+			}
+		}
+		if (!msgout)
+			printk("%s: ports already in use, skipping probe\n", hwif->name);
+		return;	
+	}
+
+	__save_flags(flags);	/* local CPU only */
+	__sti();		/* local CPU only; needed for jiffies and irq probing */
+	/*
+	 * Second drive should only exist if first drive was found,
+	 * but a lot of cdrom drives are configured as single slaves.
+	 */
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		ide_drive_t *drive = &hwif->drives[unit];
+		(void) probe_for_drive (drive);
+		if (drive->present && !hwif->present) {
+			hwif->present = 1;
+			if (hwif->chipset != ide_4drives || !hwif->mate->present) {
+				hwif_register(hwif);
+			}
+		}
+	}
+	if (hwif->io_ports[IDE_CONTROL_OFFSET] && hwif->reset) {
+		unsigned long timeout = jiffies + WAIT_WORSTCASE;
+		byte stat;
+
+		printk("%s: reset\n", hwif->name);
+		OUT_BYTE(12, hwif->io_ports[IDE_CONTROL_OFFSET]);
+		udelay(10);
+		OUT_BYTE(8, hwif->io_ports[IDE_CONTROL_OFFSET]);
+		do {
+			ide_delay_50ms();
+			stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+		} while ((stat & BUSY_STAT) && 0 < (signed long)(timeout - jiffies));
+
+	}
+	__restore_flags(flags);	/* local CPU only */
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		ide_drive_t *drive = &hwif->drives[unit];
+		if (drive->present) {
+			ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+			if (tuneproc != NULL && drive->autotune == 1)
+				tuneproc(drive, 255);	/* auto-tune PIO mode */
+		}
+	}
+}
+
+#if MAX_HWIFS > 1
+/*
+ * save_match() is used to simplify logic in init_irq() below.
+ *
+ * A loophole here is that we may not know about a particular
+ * hwif's irq until after that hwif is actually probed/initialized..
+ * This could be a problem for the case where an hwif is on a
+ * dual interface that requires serialization (eg. cmd640) and another
+ * hwif using one of the same irqs is initialized beforehand.
+ *
+ * This routine detects and reports such situations, but does not fix them.
+ */
+static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
+{
+	ide_hwif_t *m = *match;
+
+	if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
+		if (!new->hwgroup)
+			return;
+		printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name);
+	}
+	if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
+		*match = new;
+}
+#endif /* MAX_HWIFS > 1 */
+
+/*
+ * init request queue
+ */
+static void ide_init_queue(ide_drive_t *drive)
+{
+	request_queue_t *q = &drive->queue;
+
+	q->queuedata = HWGROUP(drive);
+	blk_init_queue(q, do_ide_request);
+
+	if (drive->media == ide_disk) {
+#ifdef CONFIG_BLK_DEV_ELEVATOR_NOOP
+		elevator_init(&q->elevator, ELEVATOR_NOOP);
+#endif
+	}
+}
+
+/*
+ * This routine sets up the irq for an ide interface, and creates a new
+ * hwgroup for the irq/hwif if none was previously assigned.
+ *
+ * Much of the code is for correctly detecting/handling irq sharing
+ * and irq serialization situations.  This is somewhat complex because
+ * it handles static as well as dynamic (PCMCIA) IDE interfaces.
+ *
+ * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with
+ * interrupts completely disabled.  This can be bad for interrupt latency,
+ * but anything else has led to problems on some machines.  We re-enable
+ * interrupts as much as we can safely do in most places.
+ */
+static int init_irq (ide_hwif_t *hwif)
+{
+	unsigned long flags;
+	unsigned int index;
+	ide_hwgroup_t *hwgroup, *new_hwgroup;
+	ide_hwif_t *match = NULL;
+
+	
+	/* Allocate the buffer and potentially sleep first */
+	
+	new_hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL);
+	
+	save_flags(flags);	/* all CPUs */
+	cli();			/* all CPUs */
+
+	hwif->hwgroup = NULL;
+#if MAX_HWIFS > 1
+	/*
+	 * Group up with any other hwifs that share our irq(s).
+	 */
+	for (index = 0; index < MAX_HWIFS; index++) {
+		ide_hwif_t *h = &ide_hwifs[index];
+		if (h->hwgroup) {  /* scan only initialized hwif's */
+			if (hwif->irq == h->irq) {
+				hwif->sharing_irq = h->sharing_irq = 1;
+				if (hwif->chipset != ide_pci || h->chipset != ide_pci) {
+					save_match(hwif, h, &match);
+				}
+			}
+			if (hwif->serialized) {
+				if (hwif->mate && hwif->mate->irq == h->irq)
+					save_match(hwif, h, &match);
+			}
+			if (h->serialized) {
+				if (h->mate && hwif->irq == h->mate->irq)
+					save_match(hwif, h, &match);
+			}
+		}
+	}
+#endif /* MAX_HWIFS > 1 */
+	/*
+	 * If we are still without a hwgroup, then form a new one
+	 */
+	if (match) {
+		hwgroup = match->hwgroup;
+		if(new_hwgroup)
+			kfree(new_hwgroup);
+	} else {
+		hwgroup = new_hwgroup;
+		if (!hwgroup) {
+			restore_flags(flags);	/* all CPUs */
+			return 1;
+		}
+		memset(hwgroup, 0, sizeof(ide_hwgroup_t));
+		hwgroup->hwif     = hwif->next = hwif;
+		hwgroup->rq       = NULL;
+		hwgroup->handler  = NULL;
+		hwgroup->drive    = NULL;
+		hwgroup->busy     = 0;
+		init_timer(&hwgroup->timer);
+		hwgroup->timer.function = &ide_timer_expiry;
+		hwgroup->timer.data = (unsigned long) hwgroup;
+	}
+
+	/*
+	 * Allocate the irq, if not already obtained for another hwif
+	 */
+	if (!match || match->irq != hwif->irq) {
+#ifdef CONFIG_IDEPCI_SHARE_IRQ
+		int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_SHIRQ : SA_INTERRUPT;
+#else /* !CONFIG_IDEPCI_SHARE_IRQ */
+		int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_INTERRUPT|SA_SHIRQ : SA_INTERRUPT;
+#endif /* CONFIG_IDEPCI_SHARE_IRQ */
+
+		if (hwif->io_ports[IDE_CONTROL_OFFSET])
+			OUT_BYTE(0x08, hwif->io_ports[IDE_CONTROL_OFFSET]); /* clear nIEN */
+
+		if (ide_request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwgroup)) {
+			if (!match)
+				kfree(hwgroup);
+			restore_flags(flags);	/* all CPUs */
+			return 1;
+		}
+	}
+
+	/*
+	 * Everything is okay, so link us into the hwgroup
+	 */
+	hwif->hwgroup = hwgroup;
+	hwif->next = hwgroup->hwif->next;
+	hwgroup->hwif->next = hwif;
+
+	for (index = 0; index < MAX_DRIVES; ++index) {
+		ide_drive_t *drive = &hwif->drives[index];
+		if (!drive->present)
+			continue;
+		if (!hwgroup->drive)
+			hwgroup->drive = drive;
+		drive->next = hwgroup->drive->next;
+		hwgroup->drive->next = drive;
+		ide_init_queue(drive);
+	}
+	if (!hwgroup->hwif) {
+		hwgroup->hwif = HWIF(hwgroup->drive);
+#ifdef DEBUG
+		printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name);
+#endif
+	}
+	restore_flags(flags);	/* all CPUs; safe now that hwif->hwgroup is set up */
+
+#if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__)
+	printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name,
+		hwif->io_ports[IDE_DATA_OFFSET],
+		hwif->io_ports[IDE_DATA_OFFSET]+7,
+		hwif->io_ports[IDE_CONTROL_OFFSET], hwif->irq);
+#elif defined(__sparc__)
+	printk("%s at 0x%03lx-0x%03lx,0x%03lx on irq %s", hwif->name,
+		hwif->io_ports[IDE_DATA_OFFSET],
+		hwif->io_ports[IDE_DATA_OFFSET]+7,
+		hwif->io_ports[IDE_CONTROL_OFFSET], __irq_itoa(hwif->irq));
+#else
+	printk("%s at %p on irq 0x%08x", hwif->name,
+		hwif->io_ports[IDE_DATA_OFFSET], hwif->irq);
+#endif /* __mc68000__ && CONFIG_APUS */
+	if (match)
+		printk(" (%sed with %s)",
+			hwif->sharing_irq ? "shar" : "serializ", match->name);
+	printk("\n");
+	return 0;
+}
+
+/*
+ * init_gendisk() (as opposed to ide_geninit) is called for each major device,
+ * after probing for drives, to allocate partition tables and other data
+ * structures needed for the routines in genhd.c.  ide_geninit() gets called
+ * somewhat later, during the partition check.
+ */
+static void init_gendisk (ide_hwif_t *hwif)
+{
+	struct gendisk *gd;
+	unsigned int unit, units, minors;
+	int *bs, *max_sect; /* , *max_ra; */
+#ifdef DEVFS_MUST_DIE
+	extern devfs_handle_t ide_devfs_handle;
+#endif
+
+#if 1
+	units = MAX_DRIVES;
+#else
+	/* figure out maximum drive number on the interface */
+	for (units = MAX_DRIVES; units > 0; --units) {
+		if (hwif->drives[units-1].present)
+			break;
+	}
+#endif
+
+	minors    = units * (1<<PARTN_BITS);
+	gd        = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
+	if (!gd)
+		goto err_kmalloc_gd;
+	gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
+	if (!gd->sizes)
+		goto err_kmalloc_gd_sizes;
+	gd->part  = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
+	if (!gd->part)
+		goto err_kmalloc_gd_part;
+	bs        = kmalloc (minors*sizeof(int), GFP_KERNEL);
+	if (!bs)
+		goto err_kmalloc_bs;
+	max_sect  = kmalloc (minors*sizeof(int), GFP_KERNEL);
+	if (!max_sect)
+		goto err_kmalloc_max_sect;
+#if 0
+	max_ra    = kmalloc (minors*sizeof(int), GFP_KERNEL);
+	if (!max_ra)
+		goto err_kmalloc_max_ra;
+#endif
+
+	memset(gd->part, 0, minors * sizeof(struct hd_struct));
+
+	/* cdroms and msdos f/s are examples of non-1024 blocksizes */
+	blksize_size[hwif->major] = bs;
+	max_sectors[hwif->major] = max_sect;
+	/*max_readahead[hwif->major] = max_ra;*/
+	for (unit = 0; unit < minors; ++unit) {
+		*bs++ = BLOCK_SIZE;
+		/*
+		 * IDE can do up to 128K per request == 256
+		 */
+		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 128);
+		/* *max_ra++ = vm_max_readahead; */
+	}
+
+	for (unit = 0; unit < units; ++unit)
+		hwif->drives[unit].part = &gd->part[unit << PARTN_BITS];
+
+	gd->major	= hwif->major;		/* our major device number */
+	gd->major_name	= IDE_MAJOR_NAME;	/* treated special in genhd.c */
+	gd->minor_shift	= PARTN_BITS;		/* num bits for partitions */
+	gd->max_p	= 1<<PARTN_BITS;	/* 1 + max partitions / drive */
+	gd->nr_real	= units;		/* current num real drives */
+	gd->real_devices= hwif;			/* ptr to internal data */
+	gd->next	= NULL;			/* linked list of major devs */
+	gd->fops        = ide_fops;             /* file operations */
+	gd->flags	= kmalloc (sizeof *gd->flags * units, GFP_KERNEL);
+	if (gd->flags)
+		memset (gd->flags, 0, sizeof *gd->flags * units);
+#ifdef DEVFS_MUST_DIE
+	gd->de_arr	= kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL);
+	if (gd->de_arr)
+		memset (gd->de_arr, 0, sizeof *gd->de_arr * units);
+#endif
+
+	hwif->gd = gd;
+	add_gendisk(gd);
+
+	for (unit = 0; unit < units; ++unit) {
+#if 1
+		char name[64];
+		ide_add_generic_settings(hwif->drives + unit);
+		hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit);
+		sprintf (name, "host%d/bus%d/target%d/lun%d",
+			(hwif->channel && hwif->mate) ?
+			hwif->mate->index : hwif->index,
+			hwif->channel, unit, hwif->drives[unit].lun);
+#ifdef DEVFS_MUST_DIE
+		if (hwif->drives[unit].present)
+			hwif->drives[unit].de = devfs_mk_dir(ide_devfs_handle, name, NULL);
+#endif
+#else
+		if (hwif->drives[unit].present) {
+			char name[64];
+
+			ide_add_generic_settings(hwif->drives + unit);
+			hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit);
+			sprintf (name, "host%d/bus%d/target%d/lun%d",
+				 (hwif->channel && hwif->mate) ? hwif->mate->index : hwif->index,
+				 hwif->channel, unit, hwif->drives[unit].lun);
+			hwif->drives[unit].de =
+				devfs_mk_dir (ide_devfs_handle, name, NULL);
+		}
+#endif
+	}
+	return;
+
+#if 0
+err_kmalloc_max_ra:
+	kfree(max_sect);
+#endif
+err_kmalloc_max_sect:
+	kfree(bs);
+err_kmalloc_bs:
+	kfree(gd->part);
+err_kmalloc_gd_part:
+	kfree(gd->sizes);
+err_kmalloc_gd_sizes:
+	kfree(gd);
+err_kmalloc_gd:
+	printk(KERN_WARNING "(ide::init_gendisk) Out of memory\n");
+	return;
+}
+
+static int hwif_init (ide_hwif_t *hwif)
+{
+	if (!hwif->present)
+		return 0;
+	if (!hwif->irq) {
+		if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET])))
+		{
+			printk("%s: DISABLED, NO IRQ\n", hwif->name);
+			return (hwif->present = 0);
+		}
+	}
+#ifdef CONFIG_BLK_DEV_HD
+	if (hwif->irq == HD_IRQ && hwif->io_ports[IDE_DATA_OFFSET] != HD_DATA) {
+		printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name);
+		return (hwif->present = 0);
+	}
+#endif /* CONFIG_BLK_DEV_HD */
+	
+	hwif->present = 0; /* we set it back to 1 if all is ok below */
+
+#ifdef DEVFS_MUST_DIE
+	if (devfs_register_blkdev (hwif->major, hwif->name, ide_fops)) {
+		printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major);
+		return (hwif->present = 0);
+	}
+#endif	
+
+	if (init_irq(hwif)) {
+		int i = hwif->irq;
+		/*
+		 *	It failed to initialise. Find the default IRQ for 
+		 *	this port and try that.
+		 */
+		if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) {
+			printk("%s: Disabled unable to get IRQ %d.\n", hwif->name, i);
+			(void) unregister_blkdev (hwif->major, hwif->name);
+			return (hwif->present = 0);
+		}
+		if (init_irq(hwif)) {
+			printk("%s: probed IRQ %d and default IRQ %d failed.\n",
+				hwif->name, i, hwif->irq);
+			(void) unregister_blkdev (hwif->major, hwif->name);
+			return (hwif->present = 0);
+		}
+		printk("%s: probed IRQ %d failed, using default.\n",
+			hwif->name, hwif->irq);
+	}
+	
+	init_gendisk(hwif);
+	blk_dev[hwif->major].data = hwif;
+	blk_dev[hwif->major].queue = ide_get_queue;
+#if 0
+	read_ahead[hwif->major] = 8;	/* (4kB) */
+#endif
+	hwif->present = 1;	/* success */
+
+#if (DEBUG_SPINLOCK > 0)
+{
+	static int done = 0;
+	if (!done++)
+		printk("io_request_lock is %p\n", &io_request_lock);    /* FIXME */
+}
+#endif
+	return hwif->present;
+}
+
+void export_ide_init_queue (ide_drive_t *drive)
+{
+	ide_init_queue(drive);
+}
+
+byte export_probe_for_drive (ide_drive_t *drive)
+{
+	return probe_for_drive(drive);
+}
+
+EXPORT_SYMBOL(export_ide_init_queue);
+EXPORT_SYMBOL(export_probe_for_drive);
+
+int ideprobe_init (void);
+static ide_module_t ideprobe_module = {
+	IDE_PROBE_MODULE,
+	ideprobe_init,
+	NULL
+};
+
+int ideprobe_init (void)
+{
+	unsigned int index;
+	int probe[MAX_HWIFS];
+
+  if (IDE_PROBE_TRACE)
+  {
+    printk (KERN_ALERT "ide-probe::ideprobe_init\n");
+  }
+	
+	MOD_INC_USE_COUNT;
+	memset(probe, 0, MAX_HWIFS * sizeof(int));
+	for (index = 0; index < MAX_HWIFS; ++index)
+		probe[index] = !ide_hwifs[index].present;
+
+	/*
+	 * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports
+	 */
+	for (index = 0; index < MAX_HWIFS; ++index)
+		if (probe[index])
+			probe_hwif(&ide_hwifs[index]);
+	for (index = 0; index < MAX_HWIFS; ++index)
+		if (probe[index])
+			hwif_init(&ide_hwifs[index]);
+	if (!ide_probe)
+		ide_probe = &ideprobe_module;
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+#ifdef MODULE
+extern int (*ide_xlate_1024_hook)(kdev_t, int, int, const char *);
+
+int init_module (void)
+{
+	unsigned int index;
+	
+	for (index = 0; index < MAX_HWIFS; ++index)
+		ide_unregister(index);
+	ideprobe_init();
+	create_proc_ide_interfaces();
+	ide_xlate_1024_hook = ide_xlate_1024;
+	return 0;
+}
+
+void cleanup_module (void)
+{
+	ide_probe = NULL;
+	ide_xlate_1024_hook = 0;
+}
+MODULE_LICENSE("GPL");
+#endif /* MODULE */
diff --git a/xen/drivers/ide/ide-taskfile.c b/xen/drivers/ide/ide-taskfile.c
new file mode 100644
index 0000000000..6e1286165f
--- /dev/null
+++ b/xen/drivers/ide/ide-taskfile.c
@@ -0,0 +1,1733 @@
+/*
+ * linux/drivers/ide/ide-taskfile.c	Version 0.20	Oct 11, 2000
+ *
+ *  Copyright (C) 2000		Michael Cornwell <cornwell@acm.org>
+ *  Copyright (C) 2000		Andre Hedrick <andre@linux-ide.org>
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ *
+ * IDE_DEBUG(__LINE__);
+ */
+
+#include <xeno/config.h>
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/hdreg.h>
+#include <xeno/ide.h>
+
+#include <asm/domain_page.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+#  define __TASKFILE__IO
+#else /* CONFIG_IDE_TASKFILE_IO */
+#  undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#define DEBUG_TASKFILE	0	/* unset when fixed */
+
+#if DEBUG_TASKFILE
+#define DTF(x...) printk(##x)
+#else
+#define DTF(x...)
+#endif
+
+inline u32 task_read_24 (ide_drive_t *drive)
+{
+	return	(IN_BYTE(IDE_HCYL_REG)<<16) |
+		(IN_BYTE(IDE_LCYL_REG)<<8) |
+		 IN_BYTE(IDE_SECTOR_REG);
+}
+
+static void ata_bswap_data (void *buffer, int wcount)
+{
+	u16 *p = buffer;
+
+	while (wcount--) {
+		*p = *p << 8 | *p >> 8; p++;
+		*p = *p << 8 | *p >> 8; p++;
+	}
+}
+
+#if SUPPORT_VLB_SYNC
+/*
+ * Some localbus EIDE interfaces require a special access sequence
+ * when using 32-bit I/O instructions to transfer data.  We call this
+ * the "vlb_sync" sequence, which consists of three successive reads
+ * of the sector count register location, with interrupts disabled
+ * to ensure that the reads all happen together.
+ */
+static inline void task_vlb_sync (ide_ioreg_t port) {
+	(void) inb (port);
+	(void) inb (port);
+	(void) inb (port);
+}
+#endif /* SUPPORT_VLB_SYNC */
+
+/*
+ * This is used for most PIO data transfers *from* the IDE interface
+ */
+void ata_input_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+	byte io_32bit = drive->io_32bit;
+
+        void *buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+	if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+		if (io_32bit & 2) {
+			unsigned long flags;
+			__save_flags(flags);	/* local CPU only */
+			__cli();		/* local CPU only */
+			task_vlb_sync(IDE_NSECTOR_REG);
+			insl(IDE_DATA_REG, buffer, wcount);
+			__restore_flags(flags);	/* local CPU only */
+		} else
+#endif /* SUPPORT_VLB_SYNC */
+			insl(IDE_DATA_REG, buffer, wcount);
+	} else {
+#if SUPPORT_SLOW_DATA_PORTS
+		if (drive->slow) {
+			unsigned short *ptr = (unsigned short *) buffer;
+			while (wcount--) {
+				*ptr++ = inw_p(IDE_DATA_REG);
+				*ptr++ = inw_p(IDE_DATA_REG);
+			}
+		} else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+			insw(IDE_DATA_REG, buffer, wcount<<1);
+	}
+
+	unmap_domain_mem(buffer);
+}
+
+/*
+ * This is used for most PIO data transfers *to* the IDE interface
+ */
+void ata_output_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+	byte io_32bit = drive->io_32bit;
+
+        void *buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+	if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+		if (io_32bit & 2) {
+			unsigned long flags;
+			__save_flags(flags);	/* local CPU only */
+			__cli();		/* local CPU only */
+			task_vlb_sync(IDE_NSECTOR_REG);
+			outsl(IDE_DATA_REG, buffer, wcount);
+			__restore_flags(flags);	/* local CPU only */
+		} else
+#endif /* SUPPORT_VLB_SYNC */
+			outsl(IDE_DATA_REG, buffer, wcount);
+	} else {
+#if SUPPORT_SLOW_DATA_PORTS
+		if (drive->slow) {
+			unsigned short *ptr = (unsigned short *) buffer;
+			while (wcount--) {
+				outw_p(*ptr++, IDE_DATA_REG);
+				outw_p(*ptr++, IDE_DATA_REG);
+			}
+		} else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+			outsw(IDE_DATA_REG, buffer, wcount<<1);
+	}
+
+	unmap_domain_mem(buffer);
+}
+
+
+static inline void taskfile_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+	ata_input_data(drive, buffer, wcount);
+	if (drive->bswap)
+		ata_bswap_data(buffer, wcount);
+}
+
+static inline void taskfile_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+	if (drive->bswap) {
+		ata_bswap_data(buffer, wcount);
+		ata_output_data(drive, buffer, wcount);
+		ata_bswap_data(buffer, wcount);
+	} else {
+		ata_output_data(drive, buffer, wcount);
+	}
+}
+
+ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
+{
+	task_struct_t *taskfile = (task_struct_t *) task->tfRegister;
+	hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister;
+	struct hd_driveid *id = drive->id;
+	byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+
+	printk(KERN_ALERT "do_rw_taskfile\n");
+
+	/* (ks/hs): Moved to start, do not use for multiple out commands */
+	if (task->handler != task_mulout_intr) {
+		if (IDE_CONTROL_REG)
+			OUT_BYTE(drive->ctl, IDE_CONTROL_REG);	/* clear nIEN */
+		SELECT_MASK(HWIF(drive), drive, 0);
+	}
+
+	if ((id->command_set_2 & 0x0400) &&
+	    (id->cfs_enable_2 & 0x0400) &&
+	    (drive->addressing == 1)) {
+		OUT_BYTE(hobfile->feature, IDE_FEATURE_REG);
+		OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+		OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+		OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+		OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+	}
+
+	OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+	OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+	/* refers to number of sectors to transfer */
+	OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+	/* refers to sector offset or start sector */
+	OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+	OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+	OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG);
+	if (task->handler != NULL) {
+#if 0
+		ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+		OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+		/*
+		 * warning check for race between handler and prehandler for
+		 * writing first block of data.  however since we are well
+		 * inside the boundaries of the seek, we should be okay.
+		 */
+		if (task->prehandler != NULL) {
+			return task->prehandler(drive, task->rq);
+		}
+#else
+		ide_startstop_t startstop;
+
+		ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+		OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+
+		if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+			printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+				drive->name,
+				drive->mult_count ? "MULTWRITE" : "WRITE");
+			return startstop;
+		}
+		/* (ks/hs): Fixed Multi Write */
+		if ((taskfile->command != WIN_MULTWRITE) &&
+		    (taskfile->command != WIN_MULTWRITE_EXT)) {
+			struct request *rq = HWGROUP(drive)->rq;
+		/* For Write_sectors we need to stuff the first sector */
+			taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+			rq->current_nr_sectors--;
+		} else {
+		/* Stuff first sector(s) by implicitly calling the handler */
+			if (!(drive_is_ready(drive))) {
+			/* FIXME: Replace hard-coded 100, error handling? */
+				int i;
+				for (i=0; i<100; i++) {
+					if (drive_is_ready(drive))
+						break;
+				}
+			}
+			return task->handler(drive);
+		}
+#endif
+	} else {
+		/* for dma commands we down set the handler */
+		if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+	}
+
+	return ide_started;
+}
+
+void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler)
+{
+	struct hd_driveid *id = drive->id;
+	byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+
+	/* (ks/hs): Moved to start, do not use for multiple out commands */
+	if (*handler != task_mulout_intr) {
+		if (IDE_CONTROL_REG)
+			OUT_BYTE(drive->ctl, IDE_CONTROL_REG);  /* clear nIEN */
+		SELECT_MASK(HWIF(drive), drive, 0);
+	}
+
+	if ((id->command_set_2 & 0x0400) &&
+	    (id->cfs_enable_2 & 0x0400) &&
+	    (drive->addressing == 1)) {
+		OUT_BYTE(hobfile->feature, IDE_FEATURE_REG);
+		OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+		OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+		OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+		OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+	}
+
+	OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+	OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+	/* refers to number of sectors to transfer */
+	OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+	/* refers to sector offset or start sector */
+	OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+	OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+	OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG);
+	if (handler != NULL) {
+		ide_set_handler (drive, handler, WAIT_CMD, NULL);
+		OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+	} else {
+		/* for dma commands we down set the handler */
+		if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+	}
+}
+
+#if 0
+ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task)
+{
+	task_struct_t *taskfile = (task_struct_t *) task->tfRegister;
+	hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister;
+	struct hd_driveid *id = drive->id;
+
+	/*
+	 * (KS) Check taskfile in/out flags.
+	 * If set, then execute as it is defined.
+	 * If not set, then define default settings.
+	 * The default values are:
+	 *	write and read all taskfile registers (except data) 
+	 *	write and read the hob registers (sector,nsector,lcyl,hcyl)
+	 */
+	if (task->tf_out_flags.all == 0) {
+		task->tf_out_flags.all = IDE_TASKFILE_STD_OUT_FLAGS;
+		if ((id->command_set_2 & 0x0400) &&
+		    (id->cfs_enable_2 & 0x0400) &&
+		    (drive->addressing == 1)) {
+			task->tf_out_flags.all != (IDE_HOB_STD_OUT_FLAGS << 8);
+		}
+        }
+
+	if (task->tf_in_flags.all == 0) {
+		task->tf_in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
+		if ((id->command_set_2 & 0x0400) &&
+		    (id->cfs_enable_2 & 0x0400) &&
+		    (drive->addressing == 1)) {
+			task->tf_in_flags.all  != (IDE_HOB_STD_IN_FLAGS  << 8);
+		}
+        }
+
+	if (IDE_CONTROL_REG)
+		OUT_BYTE(drive->ctl, IDE_CONTROL_REG);	/* clear nIEN */
+	SELECT_MASK(HWIF(drive), drive, 0);
+
+	if (task->tf_out_flags.b.data) {
+		unsigned short data =  taskfile->data + (hobfile->data << 8);
+		OUT_WORD (data, IDE_DATA_REG);
+	}
+
+	/* (KS) send hob registers first */
+	if (task->tf_out_flags.b.nsector_hob)
+		OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+	if (task->tf_out_flags.b.sector_hob)
+		OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+	if (task->tf_out_flags.b.lcyl_hob)
+		OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+	if (task->tf_out_flags.b.hcyl_hob)
+		OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+
+
+	/* (KS) Send now the standard registers */
+	if (task->tf_out_flags.b.error_feature)
+		OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+	/* refers to number of sectors to transfer */
+	if (task->tf_out_flags.b.nsector)
+		OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+	/* refers to sector offset or start sector */
+	if (task->tf_out_flags.b.sector)
+		OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+	if (task->tf_out_flags.b.lcyl)
+		OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+	if (task->tf_out_flags.b.hcyl)
+		OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+        /*
+	 * (KS) Do not modify the specified taskfile. We want to have a
+	 * universal pass through, so we must execute ALL specified values.
+	 *
+	 * (KS) The drive head register is mandatory.
+	 * Don't care about the out flags !
+	 */
+	OUT_BYTE(taskfile->device_head | drive->select.all, IDE_SELECT_REG);
+	if (task->handler != NULL) {
+#if 0
+		ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+		OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+		/*
+		 * warning check for race between handler and prehandler for
+		 * writing first block of data.  however since we are well
+		 * inside the boundaries of the seek, we should be okay.
+		 */
+		if (task->prehandler != NULL) {
+			return task->prehandler(drive, task->rq);
+		}
+#else
+		ide_startstop_t startstop;
+
+		ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+
+		/*
+		 * (KS) The drive command register is also mandatory.
+		 * Don't care about the out flags !
+		 */
+		OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+
+		if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+			printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+				drive->name,
+				drive->mult_count ? "MULTWRITE" : "WRITE");
+			return startstop;
+		}
+		/* (ks/hs): Fixed Multi Write */
+		if ((taskfile->command != WIN_MULTWRITE) &&
+		    (taskfile->command != WIN_MULTWRITE_EXT)) {
+			struct request *rq = HWGROUP(drive)->rq;
+		/* For Write_sectors we need to stuff the first sector */
+			taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+			rq->current_nr_sectors--;
+		} else {
+		/* Stuff first sector(s) by implicitly calling the handler */
+			if (!(drive_is_ready(drive))) {
+			/* FIXME: Replace hard-coded 100, error handling? */
+				int i;
+				for (i=0; i<100; i++) {
+					if (drive_is_ready(drive))
+						break;
+				}
+			}
+			return task->handler(drive);
+		}
+#endif
+	} else {
+		/* for dma commands we down set the handler */
+		if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+	}
+
+	return ide_started;
+}
+#endif
+
+#if 0
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte taskfile_dump_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+	unsigned long flags;
+	byte err = 0;
+
+	__save_flags (flags);	/* local CPU only */
+	ide__sti();		/* local CPU only */
+	printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+	printk(" { ");
+	if (stat & BUSY_STAT)
+		printk("Busy ");
+	else {
+		if (stat & READY_STAT)	printk("DriveReady ");
+		if (stat & WRERR_STAT)	printk("DeviceFault ");
+		if (stat & SEEK_STAT)	printk("SeekComplete ");
+		if (stat & DRQ_STAT)	printk("DataRequest ");
+		if (stat & ECC_STAT)	printk("CorrectedError ");
+		if (stat & INDEX_STAT)	printk("Index ");
+		if (stat & ERR_STAT)	printk("Error ");
+	}
+	printk("}");
+#endif  /* FANCY_STATUS_DUMPS */
+	printk("\n");
+	if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) {
+		err = GET_ERR();
+		printk("%s: %s: error=0x%02x", drive->name, msg, err);
+#if FANCY_STATUS_DUMPS
+		if (drive->media == ide_disk) {
+			printk(" { ");
+			if (err & ABRT_ERR)	printk("DriveStatusError ");
+			if (err & ICRC_ERR)	printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector ");
+			if (err & ECC_ERR)	printk("UncorrectableError ");
+			if (err & ID_ERR)	printk("SectorIdNotFound ");
+			if (err & TRK0_ERR)	printk("TrackZeroNotFound ");
+			if (err & MARK_ERR)	printk("AddrMarkNotFound ");
+			printk("}");
+			if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) {
+				if ((drive->id->command_set_2 & 0x0400) &&
+				    (drive->id->cfs_enable_2 & 0x0400) &&
+				    (drive->addressing == 1)) {
+					__u64 sectors = 0;
+					u32 low = 0, high = 0;
+					low = task_read_24(drive);
+					OUT_BYTE(0x80, IDE_CONTROL_REG);
+					high = task_read_24(drive);
+					sectors = ((__u64)high << 24) | low;
+					printk(", LBAsect=%lld", sectors);
+				} else {
+					byte cur = IN_BYTE(IDE_SELECT_REG);
+					if (cur & 0x40) {	/* using LBA? */
+						printk(", LBAsect=%ld", (unsigned long)
+						 ((cur&0xf)<<24)
+						 |(IN_BYTE(IDE_HCYL_REG)<<16)
+						 |(IN_BYTE(IDE_LCYL_REG)<<8)
+						 | IN_BYTE(IDE_SECTOR_REG));
+					} else {
+						printk(", CHS=%d/%d/%d",
+						  (IN_BYTE(IDE_HCYL_REG)<<8) +
+						   IN_BYTE(IDE_LCYL_REG),
+						  cur & 0xf,
+						  IN_BYTE(IDE_SECTOR_REG));
+					}
+				}
+				if (HWGROUP(drive)->rq)
+					printk(", sector=%llu", (__u64) HWGROUP(drive)->rq->sector);
+			}
+		}
+#endif  /* FANCY_STATUS_DUMPS */
+		printk("\n");
+	}
+	__restore_flags (flags);	/* local CPU only */
+	return err;
+}
+
+/*
+ * Clean up after success/failure of an explicit taskfile operation.
+ */
+void ide_end_taskfile (ide_drive_t *drive, byte stat, byte err)
+{
+	unsigned long flags;
+	struct request *rq;
+	ide_task_t *args;
+	task_ioreg_t command;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	rq = HWGROUP(drive)->rq;
+	spin_unlock_irqrestore(&io_request_lock, flags);
+	args = (ide_task_t *) rq->special;
+
+	command = args->tfRegister[IDE_COMMAND_OFFSET];
+
+	rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+
+	args->tfRegister[IDE_ERROR_OFFSET]   = err;
+	args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG);
+	args->tfRegister[IDE_SECTOR_OFFSET]  = IN_BYTE(IDE_SECTOR_REG);
+	args->tfRegister[IDE_LCYL_OFFSET]    = IN_BYTE(IDE_LCYL_REG);
+	args->tfRegister[IDE_HCYL_OFFSET]    = IN_BYTE(IDE_HCYL_REG);
+	args->tfRegister[IDE_SELECT_OFFSET]  = IN_BYTE(IDE_SELECT_REG);
+	args->tfRegister[IDE_STATUS_OFFSET]  = stat;
+	if ((drive->id->command_set_2 & 0x0400) &&
+	    (drive->id->cfs_enable_2 & 0x0400) &&
+	    (drive->addressing == 1)) {
+		OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB);
+		args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG);
+		args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG);
+		args->hobRegister[IDE_SECTOR_OFFSET_HOB]  = IN_BYTE(IDE_SECTOR_REG);
+		args->hobRegister[IDE_LCYL_OFFSET_HOB]    = IN_BYTE(IDE_LCYL_REG);
+		args->hobRegister[IDE_HCYL_OFFSET_HOB]    = IN_BYTE(IDE_HCYL_REG);
+	}
+
+/*	taskfile_settings_update(drive, args, command); */
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	blkdev_dequeue_request(rq);
+	HWGROUP(drive)->rq = NULL;
+	end_that_request_last(rq);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * try_to_flush_leftover_data() is invoked in response to a drive
+ * unexpectedly having its DRQ_STAT bit set.  As an alternative to
+ * resetting the drive, this routine tries to clear the condition
+ * by read a sector's worth of data from the drive.  Of course,
+ * this may not help if the drive is *waiting* for data from *us*.
+ */
+void task_try_to_flush_leftover_data (ide_drive_t *drive)
+{
+	int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
+
+	if (drive->media != ide_disk)
+		return;
+	while (i > 0) {
+		u32 buffer[16];
+		unsigned int wcount = (i > 16) ? 16 : i;
+		i -= wcount;
+		taskfile_input_data (drive, buffer, wcount);
+	}
+}
+
+/*
+ * taskfile_error() takes action based on the error returned by the drive.
+ */
+ide_startstop_t taskfile_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+	struct request *rq;
+	byte err;
+
+        err = taskfile_dump_status(drive, msg, stat);
+	if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+		return ide_stopped;
+	/* retry only "normal" I/O: */
+	if (rq->cmd == IDE_DRIVE_TASKFILE) {
+		rq->errors = 1;
+		ide_end_taskfile(drive, stat, err);
+		return ide_stopped;
+	}
+	if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */
+		rq->errors |= ERROR_RESET;
+	} else {
+		if (drive->media == ide_disk && (stat & ERR_STAT)) {
+			/* err has different meaning on cdrom and tape */
+			if (err == ABRT_ERR) {
+				if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY)
+					return ide_stopped;	/* some newer drives don't support WIN_SPECIFY */
+			} else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) {
+				drive->crc_count++;	/* UDMA crc error -- just retry the operation */
+			} else if (err & (BBD_ERR | ECC_ERR))	/* retries won't help these */
+				rq->errors = ERROR_MAX;
+			else if (err & TRK0_ERR)	/* help it find track zero */
+                                rq->errors |= ERROR_RECAL;
+                }
+                if ((stat & DRQ_STAT) && rq->cmd != WRITE)
+                        task_try_to_flush_leftover_data(drive);
+	}
+	if (GET_STAT() & (BUSY_STAT|DRQ_STAT))
+		OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG);	/* force an abort */
+
+	if (rq->errors >= ERROR_MAX) {
+		if (drive->driver != NULL)
+			DRIVER(drive)->end_request(0, HWGROUP(drive));
+		else
+			ide_end_request(0, HWGROUP(drive));
+	} else {
+		if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
+			++rq->errors;
+			return ide_do_reset(drive);
+		}
+		if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+			drive->special.b.recalibrate = 1;
+		++rq->errors;
+	}
+	return ide_stopped;
+}
+#endif
+
+/*
+ * Handler for special commands without a data phase from ide-disk
+ */
+
+/*
+ * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd.
+ */
+ide_startstop_t set_multmode_intr (ide_drive_t *drive)
+{
+	byte stat;
+
+	if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) {
+		drive->mult_count = drive->mult_req;
+	} else {
+		drive->mult_req = drive->mult_count = 0;
+		drive->special.b.recalibrate = 1;
+		(void) ide_dump_status(drive, "set_multmode", stat);
+	}
+	return ide_stopped;
+}
+
+/*
+ * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd.
+ */
+ide_startstop_t set_geometry_intr (ide_drive_t *drive)
+{
+	byte stat;
+
+	if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT))
+		return ide_stopped;
+
+	if (stat & (ERR_STAT|DRQ_STAT))
+		return ide_error(drive, "set_geometry_intr", stat);
+
+	ide_set_handler(drive, &set_geometry_intr, WAIT_CMD, NULL);
+	return ide_started;
+}
+
+/*
+ * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd.
+ */
+ide_startstop_t recal_intr (ide_drive_t *drive)
+{
+	byte stat = GET_STAT();
+
+	if (!OK_STAT(stat,READY_STAT,BAD_STAT))
+		return ide_error(drive, "recal_intr", stat);
+	return ide_stopped;
+}
+
+/*
+ * Handler for commands without a data phase
+ */
+ide_startstop_t task_no_data_intr (ide_drive_t *drive)
+{
+	ide_task_t *args	= HWGROUP(drive)->rq->special;
+	byte stat		= GET_STAT();
+
+	ide__sti();	/* local CPU only */
+
+	if (!OK_STAT(stat, READY_STAT, BAD_STAT))
+		return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */
+
+	if (args)
+		ide_end_drive_cmd (drive, stat, GET_ERR());
+
+	return ide_stopped;
+}
+
+/*
+ * Handler for command with PIO data-in phase
+ */
+ide_startstop_t task_in_intr (ide_drive_t *drive)
+{
+	byte stat		= GET_STAT();
+	byte io_32bit		= drive->io_32bit;
+	struct request *rq	= HWGROUP(drive)->rq;
+	char *pBuf		= NULL;
+
+	if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+		if (stat & (ERR_STAT|DRQ_STAT)) {
+			return ide_error(drive, "task_in_intr", stat);
+		}
+		if (!(stat & BUSY_STAT)) {
+			DTF("task_in_intr to Soon wait for next interrupt\n");
+			ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL);
+			return ide_started;  
+		}
+	}
+	DTF("stat: %02x\n", stat);
+	pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+	DTF("Read: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors);
+
+	drive->io_32bit = 0;
+	taskfile_input_data(drive, pBuf, SECTOR_WORDS);
+	drive->io_32bit = io_32bit;
+
+	if (--rq->current_nr_sectors <= 0) {
+		/* (hs): swapped next 2 lines */
+		DTF("Request Ended stat: %02x\n", GET_STAT());
+		ide_end_request(1, HWGROUP(drive));
+	} else {
+		ide_set_handler(drive, &task_in_intr,  WAIT_CMD, NULL);
+		return ide_started;
+	}
+	return ide_stopped;
+}
+
+#undef ALTSTAT_SCREW_UP
+
+#ifdef ALTSTAT_SCREW_UP
+/*
+ * (ks/hs): Poll Alternate Status Register to ensure
+ * that drive is not busy.
+ */
+byte altstat_multi_busy (ide_drive_t *drive, byte stat, const char *msg)
+{
+	int i;
+
+	DTF("multi%s: ASR = %x\n", msg, stat);
+	if (stat & BUSY_STAT) {
+		/* (ks/hs): FIXME: Replace hard-coded 100, error handling? */
+		for (i=0; i<100; i++) {
+			stat = GET_ALTSTAT();
+			if ((stat & BUSY_STAT) == 0)
+				break;
+		}
+	}
+	/*
+	 * (ks/hs): Read Status AFTER Alternate Status Register
+	 */
+	return(GET_STAT());
+}
+
+/*
+ * (ks/hs): Poll Alternate status register to wait for drive
+ * to become ready for next transfer
+ */
+byte altstat_multi_poll (ide_drive_t *drive, byte stat, const char *msg)
+{
+	/* (ks/hs): FIXME: Error handling, time-out? */
+	while (stat & BUSY_STAT)
+		stat = GET_ALTSTAT();
+	DTF("multi%s: nsect=1, ASR = %x\n", msg, stat);
+	return(GET_STAT());	/* (ks/hs): Clear pending IRQ */
+}
+#endif /* ALTSTAT_SCREW_UP */
+
+/*
+ * Handler for command with Read Multiple
+ */
+ide_startstop_t task_mulin_intr (ide_drive_t *drive)
+{
+	unsigned int		msect, nsect;
+
+#ifdef ALTSTAT_SCREW_UP
+	byte stat	= altstat_multi_busy(drive, GET_ALTSTAT(), "read");
+#else
+	byte stat		= GET_STAT();
+#endif /* ALTSTAT_SCREW_UP */
+
+	byte io_32bit		= drive->io_32bit;
+	struct request *rq	= HWGROUP(drive)->rq;
+	char *pBuf		= NULL;
+
+	if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+		if (stat & (ERR_STAT|DRQ_STAT)) {
+			return ide_error(drive, "task_mulin_intr", stat);
+		}
+		/* no data yet, so wait for another interrupt */
+		ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL);
+		return ide_started;
+	}
+
+	/* (ks/hs): Fixed Multi-Sector transfer */
+	msect = drive->mult_count;
+
+#ifdef ALTSTAT_SCREW_UP
+	/*
+	 * Screw the request we do not support bad data-phase setups!
+	 * Either read and learn the ATA standard or crash yourself!
+	 */
+	if (!msect) {
+		/*
+		 * (ks/hs): Drive supports multi-sector transfer,
+		 * drive->mult_count was not set
+		 */
+		nsect = 1;
+		while (rq->current_nr_sectors) {
+			pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+			DTF("Multiread: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors);
+			drive->io_32bit = 0;
+			taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
+			drive->io_32bit = io_32bit;
+			rq->errors = 0;
+			rq->current_nr_sectors -= nsect;
+			stat = altstat_multi_poll(drive, GET_ALTSTAT(), "read");
+		}
+		ide_end_request(1, HWGROUP(drive));
+		return ide_stopped;
+	}
+#endif /* ALTSTAT_SCREW_UP */
+
+	nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors;
+	pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+
+	DTF("Multiread: %p, nsect: %d , rq->current_nr_sectors: %ld\n",
+		pBuf, nsect, rq->current_nr_sectors);
+	drive->io_32bit = 0;
+	taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
+	drive->io_32bit = io_32bit;
+	rq->errors = 0;
+	rq->current_nr_sectors -= nsect;
+	if (rq->current_nr_sectors != 0) {
+		ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL);
+		return ide_started;
+	}
+	ide_end_request(1, HWGROUP(drive));
+	return ide_stopped;
+}
+
+ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq)
+{
+	ide_task_t *args = rq->special;
+	ide_startstop_t startstop;
+
+	if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+		printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, drive->mult_count ? "MULTWRITE" : "WRITE");
+		return startstop;
+	}
+
+	/* (ks/hs): Fixed Multi Write */
+	if ((args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE) &&
+	    (args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE_EXT)) {
+		/* For Write_sectors we need to stuff the first sector */
+		taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+		rq->current_nr_sectors--;
+		return ide_started;
+	} else {
+		/*
+		 * (ks/hs): Stuff the first sector(s)
+		 * by implicitly calling the handler
+		 */
+		if (!(drive_is_ready(drive))) {
+			int i;
+			/*
+			 * (ks/hs): FIXME: Replace hard-coded
+			 *               100, error handling?
+			 */
+			for (i=0; i<100; i++) {
+				if (drive_is_ready(drive))
+					break;
+			}
+		}
+		return args->handler(drive);
+	}
+	return ide_started;
+}
+
+/*
+ * Handler for command with PIO data-out phase
+ */
+ide_startstop_t task_out_intr (ide_drive_t *drive)
+{
+	byte stat		= GET_STAT();
+	byte io_32bit		= drive->io_32bit;
+	struct request *rq	= HWGROUP(drive)->rq;
+	char *pBuf		= NULL;
+
+	if (!rq->current_nr_sectors) { 
+		ide_end_request(1, HWGROUP(drive));
+		return ide_stopped;
+	}
+
+	if (!OK_STAT(stat,DRIVE_READY,drive->bad_wstat)) {
+		return ide_error(drive, "task_out_intr", stat);
+	}
+	if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) {
+		rq = HWGROUP(drive)->rq;
+		pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+		DTF("write: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors);
+		drive->io_32bit = 0;
+		taskfile_output_data(drive, pBuf, SECTOR_WORDS);
+		drive->io_32bit = io_32bit;
+		rq->errors = 0;
+		rq->current_nr_sectors--;
+	}
+
+	if (rq->current_nr_sectors <= 0) {
+		ide_end_request(1, HWGROUP(drive));
+	} else {
+		ide_set_handler(drive, &task_out_intr, WAIT_CMD, NULL);
+		return ide_started;
+	}
+	return ide_stopped;
+}
+
+/*
+ * Handler for command write multiple
+ * Called directly from execute_drive_cmd for the first bunch of sectors,
+ * afterwards only by the ISR
+ */
+ide_startstop_t task_mulout_intr (ide_drive_t *drive)
+{
+	unsigned int		msect, nsect;
+
+#ifdef ALTSTAT_SCREW_UP
+	byte stat	= altstat_multi_busy(drive, GET_ALTSTAT(), "write");
+#else
+	byte stat		= GET_STAT();
+#endif /* ALTSTAT_SCREW_UP */
+
+	byte io_32bit		= drive->io_32bit;
+	struct request *rq	= HWGROUP(drive)->rq;
+	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
+	char *pBuf		= NULL;
+
+	/*
+	 * (ks/hs): Handle last IRQ on multi-sector transfer,
+	 * occurs after all data was sent
+	 */
+	if (rq->current_nr_sectors == 0) {
+		if (stat & (ERR_STAT|DRQ_STAT))
+			return ide_error(drive, "task_mulout_intr", stat);
+		ide_end_request(1, HWGROUP(drive));
+		return ide_stopped;
+	}
+
+	if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+		if (stat & (ERR_STAT|DRQ_STAT)) {
+			return ide_error(drive, "task_mulout_intr", stat);
+		}
+		/* no data yet, so wait for another interrupt */
+		if (hwgroup->handler == NULL)
+			ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL);
+		return ide_started;
+	}
+
+	/* (ks/hs): See task_mulin_intr */
+	msect = drive->mult_count;
+
+#ifdef ALTSTAT_SCREW_UP
+	/*
+	 * Screw the request we do not support bad data-phase setups!
+	 * Either read and learn the ATA standard or crash yourself!
+	 */
+	if (!msect) {
+		nsect = 1;
+		while (rq->current_nr_sectors) {
+			pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+			DTF("Multiwrite: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors);
+			drive->io_32bit = 0;
+			taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
+			drive->io_32bit = io_32bit;
+			rq->errors = 0;
+			rq->current_nr_sectors -= nsect;
+			stat = altstat_multi_poll(drive, GET_ALTSTAT(), "write");
+		}
+		ide_end_request(1, HWGROUP(drive));
+		return ide_stopped;
+	}
+#endif /* ALTSTAT_SCREW_UP */
+
+	nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors;
+	pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+	DTF("Multiwrite: %p, nsect: %d , rq->current_nr_sectors: %ld\n",
+		pBuf, nsect, rq->current_nr_sectors);
+	drive->io_32bit = 0;
+	taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
+	drive->io_32bit = io_32bit;
+	rq->errors = 0;
+	rq->current_nr_sectors -= nsect;
+	if (hwgroup->handler == NULL)
+		ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL);
+	return ide_started;
+}
+
+/* Called by internal to feature out type of command being called */
+ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile)
+{
+	switch(taskfile->command) {
+				/* IDE_DRIVE_TASK_RAW_WRITE */
+		case CFA_WRITE_MULTI_WO_ERASE:
+		case WIN_MULTWRITE:
+		case WIN_MULTWRITE_EXT:
+//		case WIN_WRITEDMA:
+//		case WIN_WRITEDMA_QUEUED:
+//		case WIN_WRITEDMA_EXT:
+//		case WIN_WRITEDMA_QUEUED_EXT:
+				/* IDE_DRIVE_TASK_OUT */
+		case WIN_WRITE:
+		case WIN_WRITE_VERIFY:
+		case WIN_WRITE_BUFFER:
+		case CFA_WRITE_SECT_WO_ERASE:
+		case WIN_DOWNLOAD_MICROCODE:
+			return &pre_task_out_intr;
+				/* IDE_DRIVE_TASK_OUT */
+		case WIN_SMART:
+			if (taskfile->feature == SMART_WRITE_LOG_SECTOR)
+				return &pre_task_out_intr;
+		default:
+			break;
+	}
+	return(NULL);
+}
+
+/* Called by internal to feature out type of command being called */
+ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile)
+{
+	switch(taskfile->command) {
+		case WIN_IDENTIFY:
+		case WIN_PIDENTIFY:
+		case CFA_TRANSLATE_SECTOR:
+		case WIN_READ_BUFFER:
+		case WIN_READ:
+		case WIN_READ_EXT:
+			return &task_in_intr;
+		case WIN_SECURITY_DISABLE:
+		case WIN_SECURITY_ERASE_UNIT:
+		case WIN_SECURITY_SET_PASS:
+		case WIN_SECURITY_UNLOCK:
+		case WIN_DOWNLOAD_MICROCODE:
+		case CFA_WRITE_SECT_WO_ERASE:
+		case WIN_WRITE_BUFFER:
+		case WIN_WRITE_VERIFY:
+		case WIN_WRITE:
+		case WIN_WRITE_EXT:
+			return &task_out_intr;
+		case WIN_MULTREAD:
+		case WIN_MULTREAD_EXT:
+			return &task_mulin_intr;
+		case CFA_WRITE_MULTI_WO_ERASE:
+		case WIN_MULTWRITE:
+		case WIN_MULTWRITE_EXT:
+			return &task_mulout_intr;
+		case WIN_SMART:
+			switch(taskfile->feature) {
+				case SMART_READ_VALUES:
+				case SMART_READ_THRESHOLDS:
+				case SMART_READ_LOG_SECTOR:
+					return &task_in_intr;
+				case SMART_WRITE_LOG_SECTOR:
+					return &task_out_intr;
+				default:
+					return &task_no_data_intr;
+			}
+		case CFA_REQ_EXT_ERROR_CODE:
+		case CFA_ERASE_SECTORS:
+		case WIN_VERIFY:
+		case WIN_VERIFY_EXT:
+		case WIN_SEEK:
+			return &task_no_data_intr;
+		case WIN_SPECIFY:
+			return &set_geometry_intr;
+		case WIN_RESTORE:
+			return &recal_intr;
+		case WIN_DIAGNOSE:
+		case WIN_FLUSH_CACHE:
+		case WIN_FLUSH_CACHE_EXT:
+		case WIN_STANDBYNOW1:
+		case WIN_STANDBYNOW2:
+		case WIN_SLEEPNOW1:
+		case WIN_SLEEPNOW2:
+		case WIN_SETIDLE1:
+		case WIN_CHECKPOWERMODE1:
+		case WIN_CHECKPOWERMODE2:
+		case WIN_GETMEDIASTATUS:
+		case WIN_MEDIAEJECT:
+			return &task_no_data_intr;
+		case WIN_SETMULT:
+			return &set_multmode_intr;
+		case WIN_READ_NATIVE_MAX:
+		case WIN_SET_MAX:
+		case WIN_READ_NATIVE_MAX_EXT:
+		case WIN_SET_MAX_EXT:
+		case WIN_SECURITY_ERASE_PREPARE:
+		case WIN_SECURITY_FREEZE_LOCK:
+		case WIN_DOORLOCK:
+		case WIN_DOORUNLOCK:
+		case WIN_SETFEATURES:
+			return &task_no_data_intr;
+		case DISABLE_SEAGATE:
+		case EXABYTE_ENABLE_NEST:
+			return &task_no_data_intr;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+		case WIN_READDMA:
+		case WIN_IDENTIFY_DMA:
+		case WIN_READDMA_QUEUED:
+		case WIN_READDMA_EXT:
+		case WIN_READDMA_QUEUED_EXT:
+		case WIN_WRITEDMA:
+		case WIN_WRITEDMA_QUEUED:
+		case WIN_WRITEDMA_EXT:
+		case WIN_WRITEDMA_QUEUED_EXT:
+#endif
+		case WIN_FORMAT:
+		case WIN_INIT:
+		case WIN_DEVICE_RESET:
+		case WIN_QUEUED_SERVICE:
+		case WIN_PACKETCMD:
+		default:
+			return(NULL);
+	}	
+}
+
+/* Called by ioctl to feature out type of command being called */
+int ide_cmd_type_parser (ide_task_t *args)
+{
+	struct hd_drive_task_hdr *taskfile = (struct hd_drive_task_hdr *) args->tfRegister;
+	struct hd_drive_hob_hdr *hobfile = (struct hd_drive_hob_hdr *) args->hobRegister;
+
+	args->prehandler = ide_pre_handler_parser(taskfile, hobfile);
+	args->handler = ide_handler_parser(taskfile, hobfile);
+
+	switch(args->tfRegister[IDE_COMMAND_OFFSET]) {
+		case WIN_IDENTIFY:
+		case WIN_PIDENTIFY:
+			return IDE_DRIVE_TASK_IN;
+		case CFA_TRANSLATE_SECTOR:
+		case WIN_READ:
+		case WIN_READ_BUFFER:
+			return IDE_DRIVE_TASK_IN;
+		case WIN_WRITE:
+		case WIN_WRITE_VERIFY:
+		case WIN_WRITE_BUFFER:
+		case CFA_WRITE_SECT_WO_ERASE:
+		case WIN_DOWNLOAD_MICROCODE:
+			return IDE_DRIVE_TASK_RAW_WRITE;
+		case WIN_MULTREAD:
+			return IDE_DRIVE_TASK_IN;
+		case CFA_WRITE_MULTI_WO_ERASE:
+		case WIN_MULTWRITE:
+			return IDE_DRIVE_TASK_RAW_WRITE;
+		case WIN_SECURITY_DISABLE:
+		case WIN_SECURITY_ERASE_UNIT:
+		case WIN_SECURITY_SET_PASS:
+		case WIN_SECURITY_UNLOCK:
+			return IDE_DRIVE_TASK_OUT;
+		case WIN_SMART:
+			args->tfRegister[IDE_LCYL_OFFSET] = SMART_LCYL_PASS;
+			args->tfRegister[IDE_HCYL_OFFSET] = SMART_HCYL_PASS;
+			switch(args->tfRegister[IDE_FEATURE_OFFSET]) {
+				case SMART_READ_VALUES:
+				case SMART_READ_THRESHOLDS:
+				case SMART_READ_LOG_SECTOR:
+					return IDE_DRIVE_TASK_IN;
+				case SMART_WRITE_LOG_SECTOR:
+					return IDE_DRIVE_TASK_OUT;
+				default:
+					return IDE_DRIVE_TASK_NO_DATA;
+			}
+#ifdef CONFIG_BLK_DEV_IDEDMA
+		case WIN_READDMA:
+		case WIN_IDENTIFY_DMA:
+		case WIN_READDMA_QUEUED:
+		case WIN_READDMA_EXT:
+		case WIN_READDMA_QUEUED_EXT:
+			return IDE_DRIVE_TASK_IN;
+		case WIN_WRITEDMA:
+		case WIN_WRITEDMA_QUEUED:
+		case WIN_WRITEDMA_EXT:
+		case WIN_WRITEDMA_QUEUED_EXT:
+			return IDE_DRIVE_TASK_RAW_WRITE;
+#endif
+		case WIN_SETFEATURES:
+			switch(args->tfRegister[IDE_FEATURE_OFFSET]) {
+				case SETFEATURES_XFER:
+					return IDE_DRIVE_TASK_SET_XFER;
+				case SETFEATURES_DIS_DEFECT:
+				case SETFEATURES_EN_APM:
+				case SETFEATURES_DIS_MSN:
+				case SETFEATURES_EN_RI:
+				case SETFEATURES_EN_SI:
+				case SETFEATURES_DIS_RPOD:
+				case SETFEATURES_DIS_WCACHE:
+				case SETFEATURES_EN_DEFECT:
+				case SETFEATURES_DIS_APM:
+				case SETFEATURES_EN_MSN:
+				case SETFEATURES_EN_RLA:
+				case SETFEATURES_PREFETCH:
+				case SETFEATURES_EN_RPOD:
+				case SETFEATURES_DIS_RI:
+				case SETFEATURES_DIS_SI:
+				default:
+					return IDE_DRIVE_TASK_NO_DATA;
+			}
+		case WIN_NOP:
+		case CFA_REQ_EXT_ERROR_CODE:
+		case CFA_ERASE_SECTORS:
+		case WIN_VERIFY:
+		case WIN_VERIFY_EXT:
+		case WIN_SEEK:
+		case WIN_SPECIFY:
+		case WIN_RESTORE:
+		case WIN_DIAGNOSE:
+		case WIN_FLUSH_CACHE:
+		case WIN_FLUSH_CACHE_EXT:
+		case WIN_STANDBYNOW1:
+		case WIN_STANDBYNOW2:
+		case WIN_SLEEPNOW1:
+		case WIN_SLEEPNOW2:
+		case WIN_SETIDLE1:
+		case DISABLE_SEAGATE:
+		case WIN_CHECKPOWERMODE1:
+		case WIN_CHECKPOWERMODE2:
+		case WIN_GETMEDIASTATUS:
+		case WIN_MEDIAEJECT:
+		case WIN_SETMULT:
+		case WIN_READ_NATIVE_MAX:
+		case WIN_SET_MAX:
+		case WIN_READ_NATIVE_MAX_EXT:
+		case WIN_SET_MAX_EXT:
+		case WIN_SECURITY_ERASE_PREPARE:
+		case WIN_SECURITY_FREEZE_LOCK:
+		case EXABYTE_ENABLE_NEST:
+		case WIN_DOORLOCK:
+		case WIN_DOORUNLOCK:
+			return IDE_DRIVE_TASK_NO_DATA;
+		case WIN_FORMAT:
+		case WIN_INIT:
+		case WIN_DEVICE_RESET:
+		case WIN_QUEUED_SERVICE:
+		case WIN_PACKETCMD:
+		default:
+			return IDE_DRIVE_TASK_INVALID;
+	}
+}
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_taskfile (struct request *rq)
+{
+	memset(rq, 0, sizeof(*rq));
+	rq->cmd = IDE_DRIVE_TASK_NO_DATA;
+}
+
+/*
+ * This is kept for internal use only !!!
+ * This is an internal call and nobody in user-space has a damn
+ * reason to call this taskfile.
+ *
+ * ide_raw_taskfile is the one that user-space executes.
+ */
+int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf)
+{
+	struct request rq;
+	ide_task_t args;
+
+	memset(&args, 0, sizeof(ide_task_t));
+
+	args.tfRegister[IDE_DATA_OFFSET]         = taskfile->data;
+	args.tfRegister[IDE_FEATURE_OFFSET]      = taskfile->feature;
+	args.tfRegister[IDE_NSECTOR_OFFSET]      = taskfile->sector_count;
+	args.tfRegister[IDE_SECTOR_OFFSET]       = taskfile->sector_number;
+	args.tfRegister[IDE_LCYL_OFFSET]         = taskfile->low_cylinder;
+	args.tfRegister[IDE_HCYL_OFFSET]         = taskfile->high_cylinder;
+	args.tfRegister[IDE_SELECT_OFFSET]       = taskfile->device_head;
+	args.tfRegister[IDE_COMMAND_OFFSET]      = taskfile->command;
+
+	args.hobRegister[IDE_DATA_OFFSET_HOB]    = hobfile->data;
+	args.hobRegister[IDE_FEATURE_OFFSET_HOB] = hobfile->feature;
+	args.hobRegister[IDE_NSECTOR_OFFSET_HOB] = hobfile->sector_count;
+	args.hobRegister[IDE_SECTOR_OFFSET_HOB]  = hobfile->sector_number;
+	args.hobRegister[IDE_LCYL_OFFSET_HOB]    = hobfile->low_cylinder;
+	args.hobRegister[IDE_HCYL_OFFSET_HOB]    = hobfile->high_cylinder;
+	args.hobRegister[IDE_SELECT_OFFSET_HOB]  = hobfile->device_head;
+	args.hobRegister[IDE_CONTROL_OFFSET_HOB] = hobfile->control;
+
+	ide_init_drive_taskfile(&rq);
+	/* This is kept for internal use only !!! */
+	args.command_type = ide_cmd_type_parser (&args);
+	if (args.command_type != IDE_DRIVE_TASK_NO_DATA)
+		rq.current_nr_sectors = rq.nr_sectors = (hobfile->sector_count << 8) | taskfile->sector_count;
+
+	rq.cmd = IDE_DRIVE_TASKFILE;
+	rq.buffer = buf;
+	rq.special = &args;
+	return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *args, byte *buf)
+{
+	struct request rq;
+	ide_init_drive_taskfile(&rq);
+	rq.cmd = IDE_DRIVE_TASKFILE;
+	rq.buffer = buf;
+
+	if (args->command_type != IDE_DRIVE_TASK_NO_DATA)
+		rq.current_nr_sectors = rq.nr_sectors = (args->hobRegister[IDE_NSECTOR_OFFSET_HOB] << 8) | args->tfRegister[IDE_NSECTOR_OFFSET];
+
+	rq.special = args;
+	return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+char * ide_ioctl_verbose (unsigned int cmd)
+{
+	return("unknown");
+}
+
+char * ide_task_cmd_verbose (byte task)
+{
+	return("unknown");
+}
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+/*
+ *  The taskfile glue table
+ *
+ *  reqtask.data_phase	reqtask.req_cmd
+ *  			args.command_type		args.handler
+ *
+ *  TASKFILE_P_OUT_DMAQ	??				??
+ *  TASKFILE_P_IN_DMAQ	??				??
+ *  TASKFILE_P_OUT_DMA	??				??
+ *  TASKFILE_P_IN_DMA	??				??
+ *  TASKFILE_P_OUT	??				??
+ *  TASKFILE_P_IN	??				??
+ *
+ *  TASKFILE_OUT_DMAQ	IDE_DRIVE_TASK_RAW_WRITE	NULL
+ *  TASKFILE_IN_DMAQ	IDE_DRIVE_TASK_IN		NULL
+ *
+ *  TASKFILE_OUT_DMA	IDE_DRIVE_TASK_RAW_WRITE	NULL
+ *  TASKFILE_IN_DMA	IDE_DRIVE_TASK_IN		NULL
+ *
+ *  TASKFILE_IN_OUT	??				??
+ *
+ *  TASKFILE_MULTI_OUT	IDE_DRIVE_TASK_RAW_WRITE	task_mulout_intr
+ *  TASKFILE_MULTI_IN	IDE_DRIVE_TASK_IN		task_mulin_intr
+ *
+ *  TASKFILE_OUT	IDE_DRIVE_TASK_RAW_WRITE	task_out_intr
+ *  TASKFILE_OUT	IDE_DRIVE_TASK_OUT		task_out_intr
+ *
+ *  TASKFILE_IN		IDE_DRIVE_TASK_IN		task_in_intr
+ *  TASKFILE_NO_DATA	IDE_DRIVE_TASK_NO_DATA		task_no_data_intr
+ *
+ *  			IDE_DRIVE_TASK_SET_XFER		task_no_data_intr
+ *  			IDE_DRIVE_TASK_INVALID
+ *
+ */
+
+#define MAX_DMA		(256*SECTOR_WORDS)
+
+int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	ide_task_request_t	*req_task;
+	ide_task_t		args;
+
+	byte *outbuf		= NULL;
+	byte *inbuf		= NULL;
+	task_ioreg_t *argsptr	= args.tfRegister;
+	task_ioreg_t *hobsptr	= args.hobRegister;
+	int err			= 0;
+	int tasksize		= sizeof(struct ide_task_request_s);
+	int taskin		= 0;
+	int taskout		= 0;
+
+	req_task = kmalloc(tasksize, GFP_KERNEL);
+	if (req_task == NULL) return -ENOMEM;
+	memset(req_task, 0, tasksize);
+	if (copy_from_user(req_task, (void *) arg, tasksize)) {
+		kfree(req_task);
+		return -EFAULT;
+	}
+
+	taskout = (int) req_task->out_size;
+	taskin  = (int) req_task->in_size;
+
+	if (taskout) {
+		int outtotal = tasksize;
+		outbuf = kmalloc(taskout, GFP_KERNEL);
+		if (outbuf == NULL) {
+			err = -ENOMEM;
+			goto abort;
+		}
+		memset(outbuf, 0, taskout);
+		if (copy_from_user(outbuf, (void *)arg + outtotal, taskout)) {
+			err = -EFAULT;
+			goto abort;
+		}
+	}
+
+	if (taskin) {
+		int intotal = tasksize + taskout;
+		inbuf = kmalloc(taskin, GFP_KERNEL);
+		if (inbuf == NULL) {
+			err = -ENOMEM;
+			goto abort;
+		}
+		memset(inbuf, 0, taskin);
+		if (copy_from_user(inbuf, (void *)arg + intotal , taskin)) {
+			err = -EFAULT;
+			goto abort;
+		}
+	}
+
+	memset(argsptr, 0, HDIO_DRIVE_TASK_HDR_SIZE);
+	memset(hobsptr, 0, HDIO_DRIVE_HOB_HDR_SIZE);
+	memcpy(argsptr, req_task->io_ports, HDIO_DRIVE_TASK_HDR_SIZE);
+	memcpy(hobsptr, req_task->hob_ports, HDIO_DRIVE_HOB_HDR_SIZE);
+
+	args.tf_in_flags  = req_task->in_flags;
+	args.tf_out_flags = req_task->out_flags;
+	args.data_phase   = req_task->data_phase;
+	args.command_type = req_task->req_cmd;
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+	DTF("%s: ide_ioctl_cmd %s:  ide_task_cmd %s\n",
+		drive->name,
+		ide_ioctl_verbose(cmd),
+		ide_task_cmd_verbose(args.tfRegister[IDE_COMMAND_OFFSET]));
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+	switch(req_task->data_phase) {
+		case TASKFILE_OUT_DMAQ:
+		case TASKFILE_OUT_DMA:
+			args.prehandler = NULL;
+			args.handler = NULL;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, outbuf);
+			break;
+		case TASKFILE_IN_DMAQ:
+		case TASKFILE_IN_DMA:
+			args.prehandler = NULL;
+			args.handler = NULL;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, inbuf);
+			break;
+		case TASKFILE_IN_OUT:
+#if 0
+			args.prehandler = &pre_task_out_intr;
+			args.handler = &task_out_intr;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, outbuf);
+			args.prehandler = NULL;
+			args.handler = &task_in_intr;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, inbuf);
+			break;
+#else
+			err = -EFAULT;
+			goto abort;
+#endif
+		case TASKFILE_MULTI_OUT:
+			if (drive->mult_count) {
+				args.prehandler = &pre_task_out_intr;
+				args.handler = &task_mulout_intr;
+				args.posthandler = NULL;
+				err = ide_raw_taskfile(drive, &args, outbuf);
+			} else {
+				/* (hs): give up if multcount is not set */
+				printk("%s: %s Multimode Write " \
+					"multcount is not set\n",
+					 drive->name, __FUNCTION__);
+				err = -EPERM;
+				goto abort;
+			}
+			break;
+		case TASKFILE_OUT:
+			args.prehandler = &pre_task_out_intr;
+			args.handler = &task_out_intr;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, outbuf);
+			break;
+		case TASKFILE_MULTI_IN:
+			if (drive->mult_count) {
+				args.prehandler = NULL;
+				args.handler = &task_mulin_intr;
+				args.posthandler = NULL;
+				err = ide_raw_taskfile(drive, &args, inbuf);
+			} else {
+				/* (hs): give up if multcount is not set */
+				printk("%s: %s Multimode Read failure " \
+					"multcount is not set\n",
+					drive->name, __FUNCTION__);
+				err = -EPERM;
+				goto abort;
+			}
+			break;
+		case TASKFILE_IN:
+			args.prehandler = NULL;
+			args.handler = &task_in_intr;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, inbuf);
+			break;
+		case TASKFILE_NO_DATA:
+			args.prehandler = NULL;
+			args.handler = &task_no_data_intr;
+			args.posthandler = NULL;
+			err = ide_raw_taskfile(drive, &args, NULL);
+			break;
+		default:
+			args.prehandler = NULL;
+			args.handler = NULL;
+			args.posthandler = NULL;
+			err = -EFAULT;
+			goto abort;
+	}
+
+	memcpy(req_task->io_ports, &(args.tfRegister), HDIO_DRIVE_TASK_HDR_SIZE);
+	memcpy(req_task->hob_ports, &(args.hobRegister), HDIO_DRIVE_HOB_HDR_SIZE);
+	req_task->in_flags  = args.tf_in_flags;
+	req_task->out_flags = args.tf_out_flags;
+
+	if (copy_to_user((void *)arg, req_task, tasksize)) {
+		err = -EFAULT;
+		goto abort;
+	}
+	if (taskout) {
+		int outtotal = tasksize;
+		if (copy_to_user((void *)arg+outtotal, outbuf, taskout)) {
+			err = -EFAULT;
+			goto abort;
+		}
+	}
+	if (taskin) {
+		int intotal = tasksize + taskout;
+		if (copy_to_user((void *)arg+intotal, inbuf, taskin)) {
+			err = -EFAULT;
+			goto abort;
+		}
+	}
+abort:
+	kfree(req_task);
+	if (outbuf != NULL)
+		kfree(outbuf);
+	if (inbuf != NULL)
+		kfree(inbuf);
+	return err;
+}
+
+EXPORT_SYMBOL(task_read_24);
+EXPORT_SYMBOL(do_rw_taskfile);
+EXPORT_SYMBOL(do_taskfile);
+// EXPORT_SYMBOL(flagged_taskfile);
+
+//EXPORT_SYMBOL(ide_end_taskfile);
+
+EXPORT_SYMBOL(set_multmode_intr);
+EXPORT_SYMBOL(set_geometry_intr);
+EXPORT_SYMBOL(recal_intr);
+
+EXPORT_SYMBOL(task_no_data_intr);
+EXPORT_SYMBOL(task_in_intr);
+EXPORT_SYMBOL(task_mulin_intr);
+EXPORT_SYMBOL(pre_task_out_intr);
+EXPORT_SYMBOL(task_out_intr);
+EXPORT_SYMBOL(task_mulout_intr);
+
+EXPORT_SYMBOL(ide_init_drive_taskfile);
+EXPORT_SYMBOL(ide_wait_taskfile);
+EXPORT_SYMBOL(ide_raw_taskfile);
+EXPORT_SYMBOL(ide_pre_handler_parser);
+EXPORT_SYMBOL(ide_handler_parser);
+EXPORT_SYMBOL(ide_cmd_type_parser);
+EXPORT_SYMBOL(ide_taskfile_ioctl);
+
+#ifdef CONFIG_PKT_TASK_IOCTL
+
+#if 0
+{
+
+{ /* start cdrom */
+
+	struct cdrom_info *info = drive->driver_data;
+
+	if (info->dma) {
+		if (info->cmd == READ) {
+			info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive);
+		} else if (info->cmd == WRITE) {
+			info->dma = !HWIF(drive)->dmaproc(ide_dma_write, drive);
+		} else {
+			printk("ide-cd: DMA set, but not allowed\n");
+		}
+	}
+
+	/* Set up the controller registers. */
+	OUT_BYTE (info->dma, IDE_FEATURE_REG);
+	OUT_BYTE (0, IDE_NSECTOR_REG);
+	OUT_BYTE (0, IDE_SECTOR_REG);
+
+	OUT_BYTE (xferlen & 0xff, IDE_LCYL_REG);
+	OUT_BYTE (xferlen >> 8  , IDE_HCYL_REG);
+	if (IDE_CONTROL_REG)
+		OUT_BYTE (drive->ctl, IDE_CONTROL_REG);
+
+	if (info->dma)
+		(void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+
+	if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) {
+		ide_set_handler (drive, handler, WAIT_CMD, cdrom_timer_expiry);
+		OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
+		return ide_started;
+	} else {
+		OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
+		return (*handler) (drive);
+	}
+
+} /* end cdrom */
+
+{ /* start floppy */
+
+	idefloppy_floppy_t *floppy = drive->driver_data;
+	idefloppy_bcount_reg_t bcount;
+	int dma_ok = 0;
+
+	floppy->pc=pc;		/* Set the current packet command */
+
+	pc->retries++;
+	pc->actually_transferred=0; /* We haven't transferred any data yet */
+	pc->current_position=pc->buffer;
+	bcount.all = IDE_MIN(pc->request_transfer, 63 * 1024);
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+	if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) {
+		(void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+	}
+	if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+		dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+	if (IDE_CONTROL_REG)
+		OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+	OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG);	/* Use PIO/DMA */
+	OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+	OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+	OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+	if (dma_ok) {	/* Begin DMA, if necessary */
+		set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+		(void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+	}
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+} /* end floppy */
+
+{ /* start tape */
+
+	idetape_tape_t *tape = drive->driver_data;
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+	if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) {
+		printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n");
+		(void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+	}
+	if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+		dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+	if (IDE_CONTROL_REG)
+		OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+	OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG);	/* Use PIO/DMA */
+	OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+	OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+	OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+	if (dma_ok) {	/* Begin DMA, if necessary */
+		set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+		(void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+	}
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+	if (test_bit(IDETAPE_DRQ_INTERRUPT, &tape->flags)) {
+		ide_set_handler(drive, &idetape_transfer_pc, IDETAPE_WAIT_CMD, NULL);
+		OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG);
+		return ide_started;
+	} else {
+		OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG);
+		return idetape_transfer_pc(drive);
+	}
+
+} /* end tape */
+
+}
+#endif
+
+int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+#if 0
+	switch(req_task->data_phase) {
+		case TASKFILE_P_OUT_DMAQ:
+		case TASKFILE_P_IN_DMAQ:
+		case TASKFILE_P_OUT_DMA:
+		case TASKFILE_P_IN_DMA:
+		case TASKFILE_P_OUT:
+		case TASKFILE_P_IN:
+	}
+#endif
+	return -ENOMSG;
+}
+
+EXPORT_SYMBOL(pkt_taskfile_ioctl);
+
+#endif /* CONFIG_PKT_TASK_IOCTL */
diff --git a/xen/drivers/ide/ide-xeno.c b/xen/drivers/ide/ide-xeno.c
new file mode 100644
index 0000000000..0b7e481ddf
--- /dev/null
+++ b/xen/drivers/ide/ide-xeno.c
@@ -0,0 +1,41 @@
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/ide.h>
+#include <hypervisor-ifs/block.h>
+#include <asm/domain_page.h>
+#include <asm/io.h>
+
+void ide_probe_devices (xen_disk_info_t* xdi)
+{
+    int loop;
+    unsigned int unit;
+    xen_disk_info_t *xen_xdi = map_domain_mem(virt_to_phys(xdi));
+    
+    for (loop = 0; loop < MAX_HWIFS; ++loop) {
+
+	ide_hwif_t *hwif = &ide_hwifs[loop];
+	if (hwif->present) {
+
+	    for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		unsigned long capacity;
+		ide_drive_t *drive = &hwif->drives[unit];
+
+		if (drive->present) {
+		    capacity = current_capacity (drive);
+		    xen_xdi->disks[xen_xdi->count].type = XEN_DISK_IDE;
+		    xen_xdi->disks[xen_xdi->count].capacity = capacity;
+		    xen_xdi->count++;
+
+		    printk (KERN_ALERT "IDE-XENO %d\n", xen_xdi->count);
+		    printk (KERN_ALERT "  capacity 0x%lx\n", capacity);
+		    printk (KERN_ALERT "  head     0x%x\n",  drive->bios_head);
+		    printk (KERN_ALERT "  sector   0x%x\n",  drive->bios_sect);
+		    printk (KERN_ALERT "  cylinder 0x%x\n",  drive->bios_cyl);
+		}
+	    }
+	}
+    }
+
+    unmap_domain_mem(xen_xdi);
+}
diff --git a/xen/drivers/ide/ide.c b/xen/drivers/ide/ide.c
new file mode 100644
index 0000000000..1db4e34834
--- /dev/null
+++ b/xen/drivers/ide/ide.c
@@ -0,0 +1,4197 @@
+/*
+ *  linux/drivers/ide/ide.c		Version 6.31	June 9, 2000
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Mostly written by Mark Lord  <mlord@pobox.com>
+ *                and Gadi Oxman <gadio@netvision.net.il>
+ *                and Andre Hedrick <andre@linux-ide.org>
+ *
+ *  See linux/MAINTAINERS for address of current maintainer.
+ *
+ * This is the multiple IDE interface driver, as evolved from hd.c.
+ * It supports up to MAX_HWIFS IDE interfaces, on one or more IRQs (usually 14 & 15).
+ * There can be up to two drives per interface, as per the ATA-2 spec.
+ *
+ * Primary:    ide0, port 0x1f0; major=3;  hda is minor=0; hdb is minor=64
+ * Secondary:  ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
+ * Tertiary:   ide2, port 0x???; major=33; hde is minor=0; hdf is minor=64
+ * Quaternary: ide3, port 0x???; major=34; hdg is minor=0; hdh is minor=64
+ * ...
+ *
+ *  From hd.c:
+ *  |
+ *  | It traverses the request-list, using interrupts to jump between functions.
+ *  | As nearly all functions can be called within interrupts, we may not sleep.
+ *  | Special care is recommended.  Have Fun!
+ *  |
+ *  | modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ *  |
+ *  | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ *  | in the early extended-partition checks and added DM partitions.
+ *  |
+ *  | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI).
+ *  |
+ *  | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ *  | and general streamlining by Mark Lord (mlord@pobox.com).
+ *
+ *  October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by:
+ *
+ *	Mark Lord	(mlord@pobox.com)		(IDE Perf.Pkg)
+ *	Delman Lee	(delman@ieee.org)		("Mr. atdisk2")
+ *	Scott Snyder	(snyder@fnald0.fnal.gov)	(ATAPI IDE cd-rom)
+ *
+ *  This was a rewrite of just about everything from hd.c, though some original
+ *  code is still sprinkled about.  Think of it as a major evolution, with
+ *  inspiration from lots of linux users, esp.  hamish@zot.apana.org.au
+ *
+ *  Version 1.0 ALPHA	initial code, primary i/f working okay
+ *  Version 1.3 BETA	dual i/f on shared irq tested & working!
+ *  Version 1.4 BETA	added auto probing for irq(s)
+ *  Version 1.5 BETA	added ALPHA (untested) support for IDE cd-roms,
+ *  ...
+ * Version 5.50		allow values as small as 20 for idebus=
+ * Version 5.51		force non io_32bit in drive_cmd_intr()
+ *			change delay_10ms() to delay_50ms() to fix problems
+ * Version 5.52		fix incorrect invalidation of removable devices
+ *			add "hdx=slow" command line option
+ * Version 5.60		start to modularize the driver; the disk and ATAPI
+ *			 drivers can be compiled as loadable modules.
+ *			move IDE probe code to ide-probe.c
+ *			move IDE disk code to ide-disk.c
+ *			add support for generic IDE device subdrivers
+ *			add m68k code from Geert Uytterhoeven
+ *			probe all interfaces by default
+ *			add ioctl to (re)probe an interface
+ * Version 6.00		use per device request queues
+ *			attempt to optimize shared hwgroup performance
+ *			add ioctl to manually adjust bandwidth algorithms
+ *			add kerneld support for the probe module
+ *			fix bug in ide_error()
+ *			fix bug in the first ide_get_lock() call for Atari
+ *			don't flush leftover data for ATAPI devices
+ * Version 6.01		clear hwgroup->active while the hwgroup sleeps
+ *			support HDIO_GETGEO for floppies
+ * Version 6.02		fix ide_ack_intr() call
+ *			check partition table on floppies
+ * Version 6.03		handle bad status bit sequencing in ide_wait_stat()
+ * Version 6.10		deleted old entries from this list of updates
+ *			replaced triton.c with ide-dma.c generic PCI DMA
+ *			added support for BIOS-enabled UltraDMA
+ *			rename all "promise" things to "pdc4030"
+ *			fix EZ-DRIVE handling on small disks
+ * Version 6.11		fix probe error in ide_scan_devices()
+ *			fix ancient "jiffies" polling bugs
+ *			mask all hwgroup interrupts on each irq entry
+ * Version 6.12		integrate ioctl and proc interfaces
+ *			fix parsing of "idex=" command line parameter
+ * Version 6.13		add support for ide4/ide5 courtesy rjones@orchestream.com
+ * Version 6.14		fixed IRQ sharing among PCI devices
+ * Version 6.15		added SMP awareness to IDE drivers
+ * Version 6.16		fixed various bugs; even more SMP friendly
+ * Version 6.17		fix for newest EZ-Drive problem
+ * Version 6.18		default unpartitioned-disk translation now "BIOS LBA"
+ * Version 6.19		Re-design for a UNIFORM driver for all platforms,
+ *			  model based on suggestions from Russell King and
+ *			  Geert Uytterhoeven
+ *			Promise DC4030VL now supported.
+ *			add support for ide6/ide7
+ *			delay_50ms() changed to ide_delay_50ms() and exported.
+ * Version 6.20		Added/Fixed Generic ATA-66 support and hwif detection.
+ *			Added hdx=flash to allow for second flash disk
+ *			  detection w/o the hang loop.
+ *			Added support for ide8/ide9
+ *			Added idex=ata66 for the quirky chipsets that are
+ *			  ATA-66 compliant, but have yet to determine a method
+ *			  of verification of the 80c cable presence.
+ *			  Specifically Promise's PDC20262 chipset.
+ * Version 6.21		Fixing/Fixed SMP spinlock issue with insight from an old
+ *			  hat that clarified original low level driver design.
+ * Version 6.30		Added SMP support; fixed multmode issues.  -ml
+ * Version 6.31		Debug Share INTR's and request queue streaming
+ *			Native ATA-100 support
+ *			Prep for Cascades Project
+ *
+ *  Some additional driver compile-time options are in ./include/linux/ide.h
+ *
+ *  To do, in likely order of completion:
+ *	- modify kernel to obtain BIOS geometry for drives on 2nd/3rd/4th i/f
+ *
+ */
+
+#define	REVISION	"Revision: 6.31"
+#define	VERSION		"Id: ide.c 6.31 2000/06/09"
+
+#undef REALLY_SLOW_IO		/* most systems can safely undef this */
+
+#define _IDE_C			/* Tell ide.h it's really us */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+/*#include <xeno/kernel.h>*/
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/init.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+/*#include <xeno/devfs_fs_kernel.h>*/
+/*#include <xeno/completion.h>*/
+/*#include <xeno/reboot.h>*/
+
+#include <asm/domain_page.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#include "ide_modes.h"
+
+#ifdef CONFIG_KMOD
+#include <xeno/kmod.h>
+#endif /* CONFIG_KMOD */
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+#  define __TASKFILE__IO
+#else /* CONFIG_IDE_TASKFILE_IO */
+#  undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifdef __TASKFILE__IO
+#else /* !__TASKFILE__IO */
+#endif /* __TASKFILE__IO */
+
+
+
+
+/* XXXXXXXXXXXX This may be replaced by fs/block_dev.c versions!!! XXXXX */
+/* (only included here so the hypervisor will link :-) */
+int check_disk_change(kdev_t dev) { return 0; }
+int unregister_blkdev(unsigned int major, const char * name) { return 0; }
+/* And these ones are from fs/inode.c... */
+int invalidate_device(kdev_t dev, int do_sync) { return 0; }
+/* fs/buffer.c... */
+void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) { }
+/* fs/partitions/check.c... */
+void grok_partitions(struct gendisk *dev, int drive, 
+                     unsigned minors, long size) { }
+void register_disk(struct gendisk *dev, kdev_t first, 
+                   unsigned minors, struct block_device_operations *ops, 
+                   long size) { }
+/* fs/devices.c... */
+const char * kdevname(kdev_t dev) { return NULL; }
+/* End of XXXXXX region */
+
+
+
+
+/* default maximum number of failures */
+#define IDE_DEFAULT_MAX_FAILURES 	1
+
+static const byte ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
+
+static int	idebus_parameter; /* holds the "idebus=" parameter */
+static int	system_bus_speed; /* holds what we think is VESA/PCI bus speed */
+static int	initializing;     /* set while initializing built-in drivers */
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+static int	ide_scan_direction;	/* THIS was formerly 2.2.x pci=reverse */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+/*
+ * ide_lock is used by the Atari code to obtain access to the IDE interrupt,
+ * which is shared between several drivers.
+ */
+static int	ide_lock;
+#endif /* __mc68000__ || CONFIG_APUS */
+
+int noautodma = 0;
+
+/*
+ * ide_modules keeps track of the available IDE chipset/probe/driver modules.
+ */
+ide_module_t *ide_modules;
+ide_module_t *ide_probe;
+
+/*
+ * This is declared extern in ide.h, for access by other IDE modules:
+ */
+ide_hwif_t	ide_hwifs[MAX_HWIFS];	/* master data repository */
+
+#if (DISK_RECOVERY_TIME > 0)
+/*
+ * For really screwy hardware (hey, at least it *can* be used with Linux)
+ * we can enforce a minimum delay time between successive operations.
+ */
+static unsigned long read_timer (void)
+{
+	unsigned long t, flags;
+	int i;
+
+	__save_flags(flags);	/* local CPU only */
+	__cli();		/* local CPU only */
+	t = jiffies * 11932;
+    	outb_p(0, 0x43);
+	i = inb_p(0x40);
+	i |= inb(0x40) << 8;
+	__restore_flags(flags);	/* local CPU only */
+	return (t - i);
+}
+#endif /* DISK_RECOVERY_TIME */
+
+static inline void set_recovery_timer (ide_hwif_t *hwif)
+{
+#if (DISK_RECOVERY_TIME > 0)
+	hwif->last_time = read_timer();
+#endif /* DISK_RECOVERY_TIME */
+}
+
+/*
+ * Do not even *think* about calling this!
+ */
+static void init_hwif_data (unsigned int index)
+{
+	unsigned int unit;
+	hw_regs_t hw;
+	ide_hwif_t *hwif = &ide_hwifs[index];
+
+	/* bulk initialize hwif & drive info with zeros */
+	memset(hwif, 0, sizeof(ide_hwif_t));
+	memset(&hw, 0, sizeof(hw_regs_t));
+
+	/* fill in any non-zero initial values */
+	hwif->index     = index;
+	ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq);
+	memcpy(&hwif->hw, &hw, sizeof(hw));
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports));
+	hwif->noprobe	= !hwif->io_ports[IDE_DATA_OFFSET];
+#ifdef CONFIG_BLK_DEV_HD
+	if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA)
+		hwif->noprobe = 1; /* may be overridden by ide_setup() */
+#endif /* CONFIG_BLK_DEV_HD */
+	hwif->major	= ide_hwif_to_major[index];
+	hwif->name[0]	= 'i';
+	hwif->name[1]	= 'd';
+	hwif->name[2]	= 'e';
+	hwif->name[3]	= '0' + index;
+	hwif->bus_state = BUSSTATE_ON;
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		ide_drive_t *drive = &hwif->drives[unit];
+
+		drive->media			= ide_disk;
+		drive->select.all		= (unit<<4)|0xa0;
+		drive->hwif			= hwif;
+		drive->ctl			= 0x08;
+		drive->ready_stat		= READY_STAT;
+		drive->bad_wstat		= BAD_W_STAT;
+		drive->special.b.recalibrate	= 1;
+		drive->special.b.set_geometry	= 1;
+		drive->name[0]			= 'h';
+		drive->name[1]			= 'd';
+		drive->name[2]			= 'a' + (index * MAX_DRIVES) + unit;
+		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
+		/*init_waitqueue_head(&drive->wqueue);*/
+	}
+}
+
+/*
+ * init_ide_data() sets reasonable default values into all fields
+ * of all instances of the hwifs and drives, but only on the first call.
+ * Subsequent calls have no effect (they don't wipe out anything).
+ *
+ * This routine is normally called at driver initialization time,
+ * but may also be called MUCH earlier during kernel "command-line"
+ * parameter processing.  As such, we cannot depend on any other parts
+ * of the kernel (such as memory allocation) to be functioning yet.
+ *
+ * This is too bad, as otherwise we could dynamically allocate the
+ * ide_drive_t structs as needed, rather than always consuming memory
+ * for the max possible number (MAX_HWIFS * MAX_DRIVES) of them.
+ */
+#define MAGIC_COOKIE 0x12345678
+static void __init init_ide_data (void)
+{
+	unsigned int index;
+	static unsigned long magic_cookie = MAGIC_COOKIE;
+
+	if (magic_cookie != MAGIC_COOKIE)
+		return;		/* already initialized */
+	magic_cookie = 0;
+
+	/* Initialise all interface structures */
+	for (index = 0; index < MAX_HWIFS; ++index)
+		init_hwif_data(index);
+
+	/* Add default hw interfaces */
+	ide_init_default_hwifs();
+
+	idebus_parameter = 0;
+	system_bus_speed = 0;
+}
+
+/*
+ * CompactFlash cards and their brethern pretend to be removable hard disks, except:
+ *	(1) they never have a slave unit, and
+ *	(2) they don't have doorlock mechanisms.
+ * This test catches them, and is invoked elsewhere when setting appropriate config bits.
+ *
+ * FIXME: This treatment is probably applicable for *all* PCMCIA (PC CARD) devices,
+ * so in linux 2.3.x we should change this to just treat all PCMCIA drives this way,
+ * and get rid of the model-name tests below (too big of an interface change for 2.2.x).
+ * At that time, we might also consider parameterizing the timeouts and retries,
+ * since these are MUCH faster than mechanical drives.	-M.Lord
+ */
+int drive_is_flashcard (ide_drive_t *drive)
+{
+	struct hd_driveid *id = drive->id;
+
+	if (drive->removable && id != NULL) {
+		if (id->config == 0x848a) return 1;	/* CompactFlash */
+		if (!strncmp(id->model, "KODAK ATA_FLASH", 15)	/* Kodak */
+		 || !strncmp(id->model, "Hitachi CV", 10)	/* Hitachi */
+		 || !strncmp(id->model, "SunDisk SDCFB", 13)	/* SunDisk */
+		 || !strncmp(id->model, "HAGIWARA HPC", 12)	/* Hagiwara */
+		 || !strncmp(id->model, "LEXAR ATA_FLASH", 15)	/* Lexar */
+		 || !strncmp(id->model, "ATA_FLASH", 9))	/* Simple Tech */
+		{
+			return 1;	/* yes, it is a flash memory card */
+		}
+	}
+	return 0;	/* no, it is not a flash memory card */
+}
+
+/*
+ * ide_system_bus_speed() returns what we think is the system VESA/PCI
+ * bus speed (in MHz).  This is used for calculating interface PIO timings.
+ * The default is 40 for known PCI systems, 50 otherwise.
+ * The "idebus=xx" parameter can be used to override this value.
+ * The actual value to be used is computed/displayed the first time through.
+ */
+int ide_system_bus_speed (void)
+{
+	if (!system_bus_speed) {
+		if (idebus_parameter)
+			system_bus_speed = idebus_parameter;	/* user supplied value */
+#ifdef CONFIG_PCI
+		else if (pci_present())
+			system_bus_speed = 33;	/* safe default value for PCI */
+#endif /* CONFIG_PCI */
+		else
+			system_bus_speed = 50;	/* safe default value for VESA and PCI */
+		printk("ide: Assuming %dMHz system bus speed for PIO modes%s\n", system_bus_speed,
+			idebus_parameter ? "" : "; override with idebus=xx");
+	}
+	return system_bus_speed;
+}
+
+#if SUPPORT_VLB_SYNC
+/*
+ * Some localbus EIDE interfaces require a special access sequence
+ * when using 32-bit I/O instructions to transfer data.  We call this
+ * the "vlb_sync" sequence, which consists of three successive reads
+ * of the sector count register location, with interrupts disabled
+ * to ensure that the reads all happen together.
+ */
+static inline void do_vlb_sync (ide_ioreg_t port) {
+	(void) inb (port);
+	(void) inb (port);
+	(void) inb (port);
+}
+#endif /* SUPPORT_VLB_SYNC */
+
+/*
+ * This is used for most PIO data transfers *from* the IDE interface
+ */
+void ide_input_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+        void *buffer;
+	byte io_32bit;
+
+	/* first check if this controller has defined a special function
+	 * for handling polled ide transfers
+	 */
+
+	if(HWIF(drive)->ideproc) {
+		HWIF(drive)->ideproc(ideproc_ide_input_data,
+				     drive, vbuffer, wcount);
+		return;
+	}
+
+	/* We assume controllers own functions will make their own
+         * arrangemnets for mapping/unmaping the destination mem if 
+	 * required (or not if DMA) 
+	 */
+
+	buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+	io_32bit = drive->io_32bit;
+
+	if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+		if (io_32bit & 2) {
+			unsigned long flags;
+			__save_flags(flags);	/* local CPU only */
+			__cli();		/* local CPU only */
+			do_vlb_sync(IDE_NSECTOR_REG);
+			insl(IDE_DATA_REG, buffer, wcount);
+			__restore_flags(flags);	/* local CPU only */
+		} else
+#endif /* SUPPORT_VLB_SYNC */
+			insl(IDE_DATA_REG, buffer, wcount);
+	} else {
+#if SUPPORT_SLOW_DATA_PORTS
+		if (drive->slow) {
+			unsigned short *ptr = (unsigned short *) buffer;
+			while (wcount--) {
+				*ptr++ = inw_p(IDE_DATA_REG);
+				*ptr++ = inw_p(IDE_DATA_REG);
+			}
+		} else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+			insw(IDE_DATA_REG, buffer, wcount<<1);
+	}
+
+	 unmap_domain_mem(buffer);
+}
+
+/*
+ * This is used for most PIO data transfers *to* the IDE interface
+ */
+void ide_output_data (ide_drive_t *drive, void *vbuffer, unsigned int wcount)
+{
+        void *buffer;
+	byte io_32bit;
+
+	if(HWIF(drive)->ideproc) {
+		HWIF(drive)->ideproc(ideproc_ide_output_data,
+				     drive, vbuffer, wcount);
+		return;
+	}
+
+	buffer = map_domain_mem(virt_to_phys(vbuffer));
+
+	io_32bit = drive->io_32bit;
+
+	if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+		if (io_32bit & 2) {
+			unsigned long flags;
+			__save_flags(flags);	/* local CPU only */
+			__cli();		/* local CPU only */
+			do_vlb_sync(IDE_NSECTOR_REG);
+			outsl(IDE_DATA_REG, buffer, wcount);
+			__restore_flags(flags);	/* local CPU only */
+		} else
+#endif /* SUPPORT_VLB_SYNC */
+			outsl(IDE_DATA_REG, buffer, wcount);
+	} else {
+#if SUPPORT_SLOW_DATA_PORTS
+		if (drive->slow) {
+			unsigned short *ptr = (unsigned short *) buffer;
+			while (wcount--) {
+				outw_p(*ptr++, IDE_DATA_REG);
+				outw_p(*ptr++, IDE_DATA_REG);
+			}
+		} else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+			outsw(IDE_DATA_REG, buffer, wcount<<1);
+	}
+
+	unmap_domain_mem(buffer);
+}
+
+/*
+ * The following routines are mainly used by the ATAPI drivers.
+ *
+ * These routines will round up any request for an odd number of bytes,
+ * so if an odd bytecount is specified, be sure that there's at least one
+ * extra byte allocated for the buffer.
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+	if(HWIF(drive)->ideproc) {
+		HWIF(drive)->ideproc(ideproc_atapi_input_bytes,
+				     drive, buffer, bytecount);
+		return;
+	}
+printk("XXXXX atapi_input_bytes called -- mapping is likely broken\n");
+	++bytecount;
+#if defined(CONFIG_ATARI) || defined(CONFIG_Q40)
+	if (MACH_IS_ATARI || MACH_IS_Q40) {
+		/* Atari has a byte-swapped IDE interface */
+		insw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+		return;
+	}
+#endif /* CONFIG_ATARI */
+	ide_input_data (drive, buffer, bytecount / 4);
+	if ((bytecount & 0x03) >= 2)
+		insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+	if(HWIF(drive)->ideproc) {
+		HWIF(drive)->ideproc(ideproc_atapi_output_bytes,
+				     drive, buffer, bytecount);
+		return;
+	}
+
+printk("XXXXX atapi_output_bytes called -- mapping is likely broken\n");
+
+	++bytecount;
+#if defined(CONFIG_ATARI) || defined(CONFIG_Q40)
+	if (MACH_IS_ATARI || MACH_IS_Q40) {
+		/* Atari has a byte-swapped IDE interface */
+		outsw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+		return;
+	}
+#endif /* CONFIG_ATARI */
+	ide_output_data (drive, buffer, bytecount / 4);
+	if ((bytecount & 0x03) >= 2)
+		outsw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+/*
+ * Needed for PCI irq sharing
+ */
+//static inline
+int drive_is_ready (ide_drive_t *drive)
+{
+	byte stat = 0;
+	if (drive->waiting_for_dma)
+		return HWIF(drive)->dmaproc(ide_dma_test_irq, drive);
+#if 0
+	udelay(1);	/* need to guarantee 400ns since last command was issued */
+#endif
+
+#ifdef CONFIG_IDEPCI_SHARE_IRQ
+	/*
+	 * We do a passive status test under shared PCI interrupts on
+	 * cards that truly share the ATA side interrupt, but may also share
+	 * an interrupt with another pci card/device.  We make no assumptions
+	 * about possible isa-pnp and pci-pnp issues yet.
+	 */
+	if (IDE_CONTROL_REG)
+		stat = GET_ALTSTAT();
+	else
+#endif /* CONFIG_IDEPCI_SHARE_IRQ */
+	stat = GET_STAT();	/* Note: this may clear a pending IRQ!! */
+
+	if (stat & BUSY_STAT)
+		return 0;	/* drive busy:  definitely not interrupting */
+	return 1;		/* drive ready: *might* be interrupting */
+}
+
+/*
+ * This is our end_request replacement function.
+ */
+void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+	struct request *rq;
+	unsigned long flags;
+	ide_drive_t *drive = hwgroup->drive;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	rq = hwgroup->rq;
+
+	/*
+	 * decide whether to reenable DMA -- 3 is a random magic for now,
+	 * if we DMA timeout more than 3 times, just stay in PIO
+	 */
+	if (drive->state == DMA_PIO_RETRY && drive->retry_pio <= 3) {
+		drive->state = 0;
+		hwgroup->hwif->dmaproc(ide_dma_on, drive);
+	}
+
+	if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) {
+		add_blkdev_randomness(MAJOR(rq->rq_dev));
+		blkdev_dequeue_request(rq);
+        	hwgroup->rq = NULL;
+		end_that_request_last(rq);
+	}
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * This should get invoked any time we exit the driver to
+ * wait for an interrupt response from a drive.  handler() points
+ * at the appropriate code to handle the next interrupt, and a
+ * timer is started to prevent us from waiting forever in case
+ * something goes wrong (see the ide_timer_expiry() handler later on).
+ */
+void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
+		      unsigned int timeout, ide_expiry_t *expiry)
+{
+	unsigned long flags;
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	if (hwgroup->handler != NULL) {
+		printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n",
+			drive->name, hwgroup->handler, handler);
+	}
+	hwgroup->handler	= handler;
+	hwgroup->expiry		= expiry;
+	hwgroup->timer.expires	= jiffies + timeout;
+	add_timer(&hwgroup->timer);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * current_capacity() returns the capacity (in sectors) of a drive
+ * according to its current geometry/LBA settings.
+ */
+unsigned long current_capacity (ide_drive_t *drive)
+{
+	if (!drive->present)
+		return 0;
+	if (drive->driver != NULL)
+		return DRIVER(drive)->capacity(drive);
+	return 0;
+}
+
+extern struct block_device_operations ide_fops[];
+/*
+ * ide_geninit() is called exactly *once* for each interface.
+ */
+void ide_geninit (ide_hwif_t *hwif)
+{
+	unsigned int unit;
+	struct gendisk *gd = hwif->gd;
+
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		ide_drive_t *drive = &hwif->drives[unit];
+
+		if (!drive->present)
+			continue;
+		if (drive->media!=ide_disk && drive->media!=ide_floppy)
+			continue;
+		register_disk(gd,MKDEV(hwif->major,unit<<PARTN_BITS),
+#ifdef CONFIG_BLK_DEV_ISAPNP
+			(drive->forced_geom && drive->noprobe) ? 1 :
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+			1<<PARTN_BITS, ide_fops,
+			current_capacity(drive));
+	}
+}
+
+static ide_startstop_t do_reset1 (ide_drive_t *, int);		/* needed below */
+
+/*
+ * atapi_reset_pollfunc() gets invoked to poll the interface for completion every 50ms
+ * during an atapi drive reset operation. If the drive has not yet responded,
+ * and we have not yet hit our maximum waiting time, then the timer is restarted
+ * for another 50ms.
+ */
+static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
+{
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	byte stat;
+
+	SELECT_DRIVE(HWIF(drive),drive);
+	udelay (10);
+
+	if (OK_STAT(stat=GET_STAT(), 0, BUSY_STAT)) {
+		printk("%s: ATAPI reset complete\n", drive->name);
+	} else {
+		if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) {
+			ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL);
+			return ide_started;	/* continue polling */
+		}
+		hwgroup->poll_timeout = 0;	/* end of polling */
+		printk("%s: ATAPI reset timed-out, status=0x%02x\n", drive->name, stat);
+		return do_reset1 (drive, 1);	/* do it the old fashioned way */
+	}
+	hwgroup->poll_timeout = 0;	/* done polling */
+	return ide_stopped;
+}
+
+/*
+ * reset_pollfunc() gets invoked to poll the interface for completion every 50ms
+ * during an ide reset operation. If the drives have not yet responded,
+ * and we have not yet hit our maximum waiting time, then the timer is restarted
+ * for another 50ms.
+ */
+static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
+{
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	ide_hwif_t *hwif = HWIF(drive);
+	byte tmp;
+
+	if (!OK_STAT(tmp=GET_STAT(), 0, BUSY_STAT)) {
+		if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) {
+			ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL);
+			return ide_started;	/* continue polling */
+		}
+		printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp);
+		drive->failures++;
+	} else  {
+		printk("%s: reset: ", hwif->name);
+		if ((tmp = GET_ERR()) == 1) {
+			printk("success\n");
+			drive->failures = 0;
+		} else {
+			drive->failures++;
+#if FANCY_STATUS_DUMPS
+			printk("master: ");
+			switch (tmp & 0x7f) {
+				case 1: printk("passed");
+					break;
+				case 2: printk("formatter device error");
+					break;
+				case 3: printk("sector buffer error");
+					break;
+				case 4: printk("ECC circuitry error");
+					break;
+				case 5: printk("controlling MPU error");
+					break;
+				default:printk("error (0x%02x?)", tmp);
+			}
+			if (tmp & 0x80)
+				printk("; slave: failed");
+			printk("\n");
+#else
+			printk("failed\n");
+#endif /* FANCY_STATUS_DUMPS */
+		}
+	}
+	hwgroup->poll_timeout = 0;	/* done polling */
+	return ide_stopped;
+}
+
+static void check_dma_crc (ide_drive_t *drive)
+{
+	if (drive->crc_count) {
+		(void) HWIF(drive)->dmaproc(ide_dma_off_quietly, drive);
+		if ((HWIF(drive)->speedproc) != NULL)
+			HWIF(drive)->speedproc(drive, ide_auto_reduce_xfer(drive));
+		if (drive->current_speed >= XFER_SW_DMA_0)
+			(void) HWIF(drive)->dmaproc(ide_dma_on, drive);
+	} else {
+		(void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+	}
+}
+
+static void pre_reset (ide_drive_t *drive)
+{
+	if (drive->driver != NULL)
+		DRIVER(drive)->pre_reset(drive);
+
+	if (!drive->keep_settings) {
+		if (drive->using_dma) {
+			check_dma_crc(drive);
+		} else {
+			drive->unmask = 0;
+			drive->io_32bit = 0;
+		}
+		return;
+	}
+	if (drive->using_dma)
+		check_dma_crc(drive);
+}
+
+/*
+ * do_reset1() attempts to recover a confused drive by resetting it.
+ * Unfortunately, resetting a disk drive actually resets all devices on
+ * the same interface, so it can really be thought of as resetting the
+ * interface rather than resetting the drive.
+ *
+ * ATAPI devices have their own reset mechanism which allows them to be
+ * individually reset without clobbering other devices on the same interface.
+ *
+ * Unfortunately, the IDE interface does not generate an interrupt to let
+ * us know when the reset operation has finished, so we must poll for this.
+ * Equally poor, though, is the fact that this may a very long time to complete,
+ * (up to 30 seconds worstcase).  So, instead of busy-waiting here for it,
+ * we set a timer to poll at 50ms intervals.
+ */
+static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
+{
+	unsigned int unit;
+	unsigned long flags;
+	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+	__save_flags(flags);	/* local CPU only */
+	__cli();		/* local CPU only */
+
+	/* For an ATAPI device, first try an ATAPI SRST. */
+	if (drive->media != ide_disk && !do_not_try_atapi) {
+		pre_reset(drive);
+		SELECT_DRIVE(hwif,drive);
+		udelay (20);
+		OUT_BYTE (WIN_SRST, IDE_COMMAND_REG);
+		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+		ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL);
+		__restore_flags (flags);	/* local CPU only */
+		return ide_started;
+	}
+
+	/*
+	 * First, reset any device state data we were maintaining
+	 * for any of the drives on this interface.
+	 */
+	for (unit = 0; unit < MAX_DRIVES; ++unit)
+		pre_reset(&hwif->drives[unit]);
+
+#if OK_TO_RESET_CONTROLLER
+	if (!IDE_CONTROL_REG) {
+		__restore_flags(flags);
+		return ide_stopped;
+	}
+	/*
+	 * Note that we also set nIEN while resetting the device,
+	 * to mask unwanted interrupts from the interface during the reset.
+	 * However, due to the design of PC hardware, this will cause an
+	 * immediate interrupt due to the edge transition it produces.
+	 * This single interrupt gives us a "fast poll" for drives that
+	 * recover from reset very quickly, saving us the first 50ms wait time.
+	 */
+	OUT_BYTE(drive->ctl|6,IDE_CONTROL_REG);	/* set SRST and nIEN */
+	udelay(10);			/* more than enough time */
+	if (drive->quirk_list == 2) {
+		OUT_BYTE(drive->ctl,IDE_CONTROL_REG);	/* clear SRST and nIEN */
+	} else {
+		OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG);	/* clear SRST, leave nIEN */
+	}
+	udelay(10);			/* more than enough time */
+	hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+	ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL);
+
+	/*
+	 * Some weird controller like resetting themselves to a strange
+	 * state when the disks are reset this way. At least, the Winbond
+	 * 553 documentation says that
+	 */
+	if (hwif->resetproc != NULL)
+		hwif->resetproc(drive);
+
+#endif	/* OK_TO_RESET_CONTROLLER */
+
+	__restore_flags (flags);	/* local CPU only */
+	return ide_started;
+}
+
+/*
+ * ide_do_reset() is the entry point to the drive/interface reset code.
+ */
+ide_startstop_t ide_do_reset (ide_drive_t *drive)
+{
+	return do_reset1 (drive, 0);
+}
+
+static inline u32 read_24 (ide_drive_t *drive)
+{
+	return  (IN_BYTE(IDE_HCYL_REG)<<16) |
+		(IN_BYTE(IDE_LCYL_REG)<<8) |
+		 IN_BYTE(IDE_SECTOR_REG);
+}
+
+/*
+ * Clean up after success/failure of an explicit drive cmd
+ */
+void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err)
+{
+	unsigned long flags;
+	struct request *rq;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	rq = HWGROUP(drive)->rq;
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	switch(rq->cmd) {
+		case IDE_DRIVE_CMD:
+		{
+			byte *args = (byte *) rq->buffer;
+			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+			if (args) {
+				args[0] = stat;
+				args[1] = err;
+				args[2] = IN_BYTE(IDE_NSECTOR_REG);
+			}
+			break;
+		}
+		case IDE_DRIVE_TASK:
+		{
+			byte *args = (byte *) rq->buffer;
+			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+			if (args) {
+				args[0] = stat;
+				args[1] = err;
+				args[2] = IN_BYTE(IDE_NSECTOR_REG);
+				args[3] = IN_BYTE(IDE_SECTOR_REG);
+				args[4] = IN_BYTE(IDE_LCYL_REG);
+				args[5] = IN_BYTE(IDE_HCYL_REG);
+				args[6] = IN_BYTE(IDE_SELECT_REG);
+			}
+			break;
+		}
+		case IDE_DRIVE_TASKFILE:
+		{
+			ide_task_t *args = (ide_task_t *) rq->special;
+			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+			if (args) {
+				if (args->tf_in_flags.b.data) {
+					unsigned short data			= IN_WORD(IDE_DATA_REG);
+					args->tfRegister[IDE_DATA_OFFSET]	= (data) & 0xFF;
+					args->hobRegister[IDE_DATA_OFFSET_HOB]	= (data >> 8) & 0xFF;
+				}
+				args->tfRegister[IDE_ERROR_OFFSET]   = err;
+				args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG);
+				args->tfRegister[IDE_SECTOR_OFFSET]  = IN_BYTE(IDE_SECTOR_REG);
+				args->tfRegister[IDE_LCYL_OFFSET]    = IN_BYTE(IDE_LCYL_REG);
+				args->tfRegister[IDE_HCYL_OFFSET]    = IN_BYTE(IDE_HCYL_REG);
+				args->tfRegister[IDE_SELECT_OFFSET]  = IN_BYTE(IDE_SELECT_REG);
+				args->tfRegister[IDE_STATUS_OFFSET]  = stat;
+
+				if ((drive->id->command_set_2 & 0x0400) &&
+				    (drive->id->cfs_enable_2 & 0x0400) &&
+				    (drive->addressing == 1)) {
+					OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB);
+					args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG);
+					args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG);
+					args->hobRegister[IDE_SECTOR_OFFSET_HOB]  = IN_BYTE(IDE_SECTOR_REG);
+					args->hobRegister[IDE_LCYL_OFFSET_HOB]    = IN_BYTE(IDE_LCYL_REG);
+					args->hobRegister[IDE_HCYL_OFFSET_HOB]    = IN_BYTE(IDE_HCYL_REG);
+				}
+			}
+			break;
+		}
+		default:
+			break;
+	}
+	spin_lock_irqsave(&io_request_lock, flags);
+	blkdev_dequeue_request(rq);
+	HWGROUP(drive)->rq = NULL;
+	end_that_request_last(rq);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+	unsigned long flags;
+	byte err = 0;
+
+	__save_flags (flags);	/* local CPU only */
+	ide__sti();		/* local CPU only */
+	printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+	printk(" { ");
+	if (stat & BUSY_STAT)
+		printk("Busy ");
+	else {
+		if (stat & READY_STAT)	printk("DriveReady ");
+		if (stat & WRERR_STAT)	printk("DeviceFault ");
+		if (stat & SEEK_STAT)	printk("SeekComplete ");
+		if (stat & DRQ_STAT)	printk("DataRequest ");
+		if (stat & ECC_STAT)	printk("CorrectedError ");
+		if (stat & INDEX_STAT)	printk("Index ");
+		if (stat & ERR_STAT)	printk("Error ");
+	}
+	printk("}");
+#endif	/* FANCY_STATUS_DUMPS */
+	printk("\n");
+	if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) {
+		err = GET_ERR();
+		printk("%s: %s: error=0x%02x", drive->name, msg, err);
+#if FANCY_STATUS_DUMPS
+		if (drive->media == ide_disk) {
+			printk(" { ");
+			if (err & ABRT_ERR)	printk("DriveStatusError ");
+			if (err & ICRC_ERR)	printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector ");
+			if (err & ECC_ERR)	printk("UncorrectableError ");
+			if (err & ID_ERR)	printk("SectorIdNotFound ");
+			if (err & TRK0_ERR)	printk("TrackZeroNotFound ");
+			if (err & MARK_ERR)	printk("AddrMarkNotFound ");
+			printk("}");
+			if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) {
+				if ((drive->id->command_set_2 & 0x0400) &&
+				    (drive->id->cfs_enable_2 & 0x0400) &&
+				    (drive->addressing == 1)) {
+					__u64 sectors = 0;
+					u32 low = 0, high = 0;
+					low = read_24(drive);
+					OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG);
+					high = read_24(drive);
+
+					sectors = ((__u64)high << 24) | low;
+					printk(", LBAsect=%llu, high=%d, low=%d",
+					       (unsigned long long) sectors,
+					       high, low);
+				} else {
+					byte cur = IN_BYTE(IDE_SELECT_REG);
+					if (cur & 0x40) {	/* using LBA? */
+						printk(", LBAsect=%ld", (unsigned long)
+						 ((cur&0xf)<<24)
+						 |(IN_BYTE(IDE_HCYL_REG)<<16)
+						 |(IN_BYTE(IDE_LCYL_REG)<<8)
+						 | IN_BYTE(IDE_SECTOR_REG));
+					} else {
+						printk(", CHS=%d/%d/%d",
+						 (IN_BYTE(IDE_HCYL_REG)<<8) +
+						  IN_BYTE(IDE_LCYL_REG),
+						  cur & 0xf,
+						  IN_BYTE(IDE_SECTOR_REG));
+					}
+				}
+				if (HWGROUP(drive) && HWGROUP(drive)->rq)
+					printk(", sector=%ld", HWGROUP(drive)->rq->sector);
+			}
+		}
+#endif	/* FANCY_STATUS_DUMPS */
+		printk("\n");
+	}
+	__restore_flags (flags);	/* local CPU only */
+	return err;
+}
+
+/*
+ * try_to_flush_leftover_data() is invoked in response to a drive
+ * unexpectedly having its DRQ_STAT bit set.  As an alternative to
+ * resetting the drive, this routine tries to clear the condition
+ * by read a sector's worth of data from the drive.  Of course,
+ * this may not help if the drive is *waiting* for data from *us*.
+ */
+static void try_to_flush_leftover_data (ide_drive_t *drive)
+{
+	int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
+
+	if (drive->media != ide_disk)
+		return;
+	while (i > 0) {
+		u32 buffer[16];
+		unsigned int wcount = (i > 16) ? 16 : i;
+		i -= wcount;
+		ide_input_data (drive, buffer, wcount);
+	}
+}
+
+/*
+ * ide_error() takes action based on the error returned by the drive.
+ */
+ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+	struct request *rq;
+	byte err;
+
+	err = ide_dump_status(drive, msg, stat);
+	if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+		return ide_stopped;
+	/* retry only "normal" I/O: */
+	if (rq->cmd == IDE_DRIVE_CMD || rq->cmd == IDE_DRIVE_TASK) {
+		rq->errors = 1;
+		ide_end_drive_cmd(drive, stat, err);
+		return ide_stopped;
+	}
+	if (rq->cmd == IDE_DRIVE_TASKFILE) {
+		rq->errors = 1;
+		ide_end_drive_cmd(drive, stat, err);
+//		ide_end_taskfile(drive, stat, err);
+		return ide_stopped;
+	}
+
+	if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */
+		rq->errors |= ERROR_RESET;
+	} else {
+		if (drive->media == ide_disk && (stat & ERR_STAT)) {
+			/* err has different meaning on cdrom and tape */
+			if (err == ABRT_ERR) {
+				if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY)
+					return ide_stopped; /* some newer drives don't support WIN_SPECIFY */
+			} else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) {
+				drive->crc_count++; /* UDMA crc error -- just retry the operation */
+			} else if (err & (BBD_ERR | ECC_ERR))	/* retries won't help these */
+				rq->errors = ERROR_MAX;
+			else if (err & TRK0_ERR)	/* help it find track zero */
+				rq->errors |= ERROR_RECAL;
+		}
+		if ((stat & DRQ_STAT) && rq->cmd != WRITE)
+			try_to_flush_leftover_data(drive);
+	}
+	if (GET_STAT() & (BUSY_STAT|DRQ_STAT))
+		OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG);	/* force an abort */
+
+	if (rq->errors >= ERROR_MAX) {
+		if (drive->driver != NULL)
+			DRIVER(drive)->end_request(0, HWGROUP(drive));
+		else
+	 		ide_end_request(0, HWGROUP(drive));
+	} else {
+		if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
+			++rq->errors;
+			return ide_do_reset(drive);
+		}
+		if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+			drive->special.b.recalibrate = 1;
+		++rq->errors;
+	}
+	return ide_stopped;
+}
+
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
+{
+	ide_set_handler (drive, handler, WAIT_CMD, NULL);
+	if (IDE_CONTROL_REG)
+		OUT_BYTE(drive->ctl,IDE_CONTROL_REG);	/* clear nIEN */
+	SELECT_MASK(HWIF(drive),drive,0);
+	OUT_BYTE(nsect,IDE_NSECTOR_REG);
+	OUT_BYTE(cmd,IDE_COMMAND_REG);
+}
+
+/*
+ * drive_cmd_intr() is invoked on completion of a special DRIVE_CMD.
+ */
+static ide_startstop_t drive_cmd_intr (ide_drive_t *drive)
+{
+	struct request *rq = HWGROUP(drive)->rq;
+	byte *args = (byte *) rq->buffer;
+	byte stat = GET_STAT();
+	int retries = 10;
+
+	ide__sti();	/* local CPU only */
+	if ((stat & DRQ_STAT) && args && args[3]) {
+		byte io_32bit = drive->io_32bit;
+		drive->io_32bit = 0;
+		ide_input_data(drive, &args[4], args[3] * SECTOR_WORDS);
+		drive->io_32bit = io_32bit;
+		while (((stat = GET_STAT()) & BUSY_STAT) && retries--)
+			udelay(100);
+	}
+
+	if (!OK_STAT(stat, READY_STAT, BAD_STAT))
+		return ide_error(drive, "drive_cmd", stat); /* calls ide_end_drive_cmd */
+	ide_end_drive_cmd (drive, stat, GET_ERR());
+	return ide_stopped;
+}
+
+/*
+ * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT
+ * commands to a drive.  It used to do much more, but has been scaled back.
+ */
+static ide_startstop_t do_special (ide_drive_t *drive)
+{
+	special_t *s = &drive->special;
+
+#ifdef DEBUG
+	printk("%s: do_special: 0x%02x\n", drive->name, s->all);
+#endif
+	if (s->b.set_tune) {
+		ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+		s->b.set_tune = 0;
+		if (tuneproc != NULL)
+			tuneproc(drive, drive->tune_req);
+	} else if (drive->driver != NULL) {
+		return DRIVER(drive)->special(drive);
+	} else if (s->all) {
+		printk("%s: bad special flag: 0x%02x\n", drive->name, s->all);
+		s->all = 0;
+	}
+	return ide_stopped;
+}
+
+/*
+ * This routine busy-waits for the drive status to be not "busy".
+ * It then checks the status for all of the "good" bits and none
+ * of the "bad" bits, and if all is okay it returns 0.  All other
+ * cases return 1 after invoking ide_error() -- caller should just return.
+ *
+ * This routine should get fixed to not hog the cpu during extra long waits..
+ * That could be done by busy-waiting for the first jiffy or two, and then
+ * setting a timer to wake up at half second intervals thereafter,
+ * until timeout is achieved, before timing out.
+ */
+int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout) {
+	byte stat;
+	int i;
+	unsigned long flags;
+ 
+	/* bail early if we've exceeded max_failures */
+	if (drive->max_failures && (drive->failures > drive->max_failures)) {
+		*startstop = ide_stopped;
+		return 1;
+	}
+
+	udelay(1);	/* spec allows drive 400ns to assert "BUSY" */
+	if ((stat = GET_STAT()) & BUSY_STAT) {
+		__save_flags(flags);	/* local CPU only */
+		ide__sti();		/* local CPU only */
+		timeout += jiffies;
+		while ((stat = GET_STAT()) & BUSY_STAT) {
+			if (0 < (signed long)(jiffies - timeout)) {
+				__restore_flags(flags);	/* local CPU only */
+				*startstop = ide_error(drive, "status timeout", stat);
+				return 1;
+			}
+		}
+		__restore_flags(flags);	/* local CPU only */
+	}
+	/*
+	 * Allow status to settle, then read it again.
+	 * A few rare drives vastly violate the 400ns spec here,
+	 * so we'll wait up to 10usec for a "good" status
+	 * rather than expensively fail things immediately.
+	 * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
+	 */
+	for (i = 0; i < 10; i++) {
+		udelay(1);
+		if (OK_STAT((stat = GET_STAT()), good, bad))
+			return 0;
+	}
+	*startstop = ide_error(drive, "status error", stat);
+	return 1;
+}
+
+/*
+ * execute_drive_cmd() issues a special drive command,
+ * usually initiated by ioctl() from the external hdparm program.
+ */
+static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq)
+{
+ 	switch(rq->cmd) {
+ 		case IDE_DRIVE_TASKFILE:
+ 		{
+ 			ide_task_t *args = rq->special;
+ 
+ 			if (!(args)) break;
+ 
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+	{
+	printk(KERN_INFO "%s: ", drive->name);
+//	printk("TF.0=x%02x ", args->tfRegister[IDE_DATA_OFFSET]);
+	printk("TF.1=x%02x ", args->tfRegister[IDE_FEATURE_OFFSET]);
+	printk("TF.2=x%02x ", args->tfRegister[IDE_NSECTOR_OFFSET]);
+	printk("TF.3=x%02x ", args->tfRegister[IDE_SECTOR_OFFSET]);
+	printk("TF.4=x%02x ", args->tfRegister[IDE_LCYL_OFFSET]);
+	printk("TF.5=x%02x ", args->tfRegister[IDE_HCYL_OFFSET]);
+	printk("TF.6=x%02x ", args->tfRegister[IDE_SELECT_OFFSET]);
+	printk("TF.7=x%02x\n", args->tfRegister[IDE_COMMAND_OFFSET]);
+	printk(KERN_INFO "%s: ", drive->name);
+//	printk("HTF.0=x%02x ", args->hobRegister[IDE_DATA_OFFSET_HOB]);
+	printk("HTF.1=x%02x ", args->hobRegister[IDE_FEATURE_OFFSET_HOB]);
+	printk("HTF.2=x%02x ", args->hobRegister[IDE_NSECTOR_OFFSET_HOB]);
+	printk("HTF.3=x%02x ", args->hobRegister[IDE_SECTOR_OFFSET_HOB]);
+	printk("HTF.4=x%02x ", args->hobRegister[IDE_LCYL_OFFSET_HOB]);
+	printk("HTF.5=x%02x ", args->hobRegister[IDE_HCYL_OFFSET_HOB]);
+	printk("HTF.6=x%02x ", args->hobRegister[IDE_SELECT_OFFSET_HOB]);
+	printk("HTF.7=x%02x\n", args->hobRegister[IDE_CONTROL_OFFSET_HOB]);
+	}
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+//			if (args->tf_out_flags.all == 0) {
+ 			do_taskfile(drive,
+ 				(struct hd_drive_task_hdr *)&args->tfRegister,
+				(struct hd_drive_hob_hdr *)&args->hobRegister,
+ 				args->handler);
+//			} else {
+//				return flagged_taskfile(drive, args);
+//			} 
+
+ 			if (((args->command_type == IDE_DRIVE_TASK_RAW_WRITE) ||
+ 			     (args->command_type == IDE_DRIVE_TASK_OUT)) &&
+			      args->prehandler && args->handler)
+				return args->prehandler(drive, rq);
+ 			return ide_started;
+ 		}
+ 		case IDE_DRIVE_TASK:
+ 		{
+ 			byte *args = rq->buffer;
+ 			byte sel;
+ 
+ 			if (!(args)) break;
+#ifdef DEBUG
+ 			printk("%s: DRIVE_TASK_CMD ", drive->name);
+ 			printk("cmd=0x%02x ", args[0]);
+ 			printk("fr=0x%02x ", args[1]);
+ 			printk("ns=0x%02x ", args[2]);
+ 			printk("sc=0x%02x ", args[3]);
+ 			printk("lcyl=0x%02x ", args[4]);
+ 			printk("hcyl=0x%02x ", args[5]);
+ 			printk("sel=0x%02x\n", args[6]);
+#endif
+ 			OUT_BYTE(args[1], IDE_FEATURE_REG);
+ 			OUT_BYTE(args[3], IDE_SECTOR_REG);
+ 			OUT_BYTE(args[4], IDE_LCYL_REG);
+ 			OUT_BYTE(args[5], IDE_HCYL_REG);
+ 			sel = (args[6] & ~0x10);
+ 			if (drive->select.b.unit)
+ 				sel |= 0x10;
+ 			OUT_BYTE(sel, IDE_SELECT_REG);
+ 			ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
+ 			return ide_started;
+ 		}
+ 		case IDE_DRIVE_CMD:
+ 		{
+ 			byte *args = rq->buffer;
+ 
+ 			if (!(args)) break;
+#ifdef DEBUG
+ 			printk("%s: DRIVE_CMD ", drive->name);
+ 			printk("cmd=0x%02x ", args[0]);
+ 			printk("sc=0x%02x ", args[1]);
+ 			printk("fr=0x%02x ", args[2]);
+ 			printk("xx=0x%02x\n", args[3]);
+#endif
+ 			if (args[0] == WIN_SMART) {
+ 				OUT_BYTE(0x4f, IDE_LCYL_REG);
+ 				OUT_BYTE(0xc2, IDE_HCYL_REG);
+ 				OUT_BYTE(args[2],IDE_FEATURE_REG);
+ 				OUT_BYTE(args[1],IDE_SECTOR_REG);
+ 				ide_cmd(drive, args[0], args[3], &drive_cmd_intr);
+ 				return ide_started;
+ 			}
+ 			OUT_BYTE(args[2],IDE_FEATURE_REG);
+ 			ide_cmd(drive, args[0], args[1], &drive_cmd_intr);
+ 			return ide_started;
+ 		}
+ 		default:
+ 			break;
+ 	}
+ 	/*
+ 	 * NULL is actually a valid way of waiting for
+ 	 * all current requests to be flushed from the queue.
+ 	 */
+#ifdef DEBUG
+ 	printk("%s: DRIVE_CMD (null)\n", drive->name);
+#endif
+ 	ide_end_drive_cmd(drive, GET_STAT(), GET_ERR());
+ 	return ide_stopped;
+}
+
+/*
+ * start_request() initiates handling of a new I/O request
+ * needed to reverse the perverted changes anonymously made back
+ * 2.3.99-pre6
+ */
+static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
+{
+	ide_startstop_t startstop;
+	unsigned long block, blockend;
+	unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS;
+	ide_hwif_t *hwif = HWIF(drive);
+
+#ifdef DEBUG
+	printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq);
+#endif
+	/* bail early if we've exceeded max_failures */
+	if (drive->max_failures && (drive->failures > drive->max_failures)) {
+		goto kill_rq;
+	}
+
+	if (unit >= MAX_DRIVES) {
+		printk("%s: bad device number: %s\n", hwif->name, kdevname(rq->rq_dev));
+		goto kill_rq;
+	}
+#ifdef DEBUG
+	if (rq->bh && !buffer_locked(rq->bh)) {
+		printk("%s: block not locked\n", drive->name);
+		goto kill_rq;
+	}
+#endif
+	block    = rq->sector;
+	blockend = block + rq->nr_sectors;
+
+
+#ifdef NEVER
+	if ((rq->cmd == READ || rq->cmd == WRITE) &&
+	    (drive->media == ide_disk || drive->media == ide_floppy)) {
+		if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) {
+			printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name,
+			 (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors);
+			goto kill_rq;
+		}
+		block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0;
+	}
+	/* Yecch - this will shift the entire interval,
+	   possibly killing some innocent following sector */
+	if (block == 0 && drive->remap_0_to_1 == 1)
+		block = 1;  /* redirect MBR access to EZ-Drive partn table */
+#endif
+
+#ifdef NEVER_DEBUG
+	{
+	  printk("    ide::start_request  %lx %lx  %lx  %lx %lx\n", 
+		 rq->sector, rq->nr_sectors, block,
+		 drive->part[minor&PARTN_MASK].start_sect, drive->sect0);
+	}
+#endif
+
+#if (DISK_RECOVERY_TIME > 0)
+	while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME);
+#endif
+
+	SELECT_DRIVE(hwif, drive);
+	if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) {
+		printk("%s: drive not ready for command\n", drive->name);
+		return startstop;
+	}
+	drive->special.all = 0;
+	if (!drive->special.all) {
+		switch(rq->cmd) {
+			case IDE_DRIVE_CMD:
+			case IDE_DRIVE_TASK:
+			case IDE_DRIVE_TASKFILE:
+				return execute_drive_cmd(drive, rq);
+			default:
+				break;
+		}
+		if (drive->driver != NULL) {
+			return (DRIVER(drive)->do_request(drive, rq, block));
+		}
+		printk("%s: media type %d not supported\n", drive->name, drive->media);
+		goto kill_rq;
+	}
+	return do_special(drive);
+kill_rq:
+	if (drive->driver != NULL)
+		DRIVER(drive)->end_request(0, HWGROUP(drive));
+	else
+		ide_end_request(0, HWGROUP(drive));
+	return ide_stopped;
+}
+
+ide_startstop_t restart_request (ide_drive_t *drive)
+{
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	unsigned long flags;
+	struct request *rq;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	hwgroup->handler = NULL;
+	del_timer(&hwgroup->timer);
+	rq = hwgroup->rq;
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	return start_request(drive, rq);
+}
+
+/*
+ * ide_stall_queue() can be used by a drive to give excess bandwidth back
+ * to the hwgroup by sleeping for timeout jiffies.
+ */
+void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
+{
+	if (timeout > WAIT_WORSTCASE)
+		timeout = WAIT_WORSTCASE;
+	drive->sleep = timeout + jiffies;
+}
+
+#define WAKEUP(drive)	((drive)->service_start + 2 * (drive)->service_time)
+
+/*
+ * choose_drive() selects the next drive which will be serviced.
+ */
+static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
+{
+	ide_drive_t *drive, *best;
+
+repeat:	
+	best = NULL;
+	drive = hwgroup->drive;
+	do {
+		if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
+			if (!best
+			 || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
+			 || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
+			{
+				if( !drive->queue.plugged )
+					best = drive;
+			}
+		}
+	} while ((drive = drive->next) != hwgroup->drive);
+	if (best && best->nice1 && !best->sleep && best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
+		long t = (signed long)(WAKEUP(best) - jiffies);
+		if (t >= WAIT_MIN_SLEEP) {
+			/*
+			 * We *may* have some time to spare, but first let's see if
+			 * someone can potentially benefit from our nice mood today..
+			 */
+			drive = best->next;
+			do {
+				if (!drive->sleep
+				 && 0 < (signed long)(WAKEUP(drive) - (jiffies - best->service_time))
+				 && 0 < (signed long)((jiffies + t) - WAKEUP(drive)))
+				{
+					ide_stall_queue(best, IDE_MIN(t, 10 * WAIT_MIN_SLEEP));
+					goto repeat;
+				}
+			} while ((drive = drive->next) != best);
+		}
+	}
+	return best;
+}
+
+/*
+ * Issue a new request to a drive from hwgroup
+ * Caller must have already done spin_lock_irqsave(&io_request_lock, ..);
+ *
+ * A hwgroup is a serialized group of IDE interfaces.  Usually there is
+ * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
+ * may have both interfaces in a single hwgroup to "serialize" access.
+ * Or possibly multiple ISA interfaces can share a common IRQ by being grouped
+ * together into one hwgroup for serialized access.
+ *
+ * Note also that several hwgroups can end up sharing a single IRQ,
+ * possibly along with many other devices.  This is especially common in
+ * PCI-based systems with off-board IDE controller cards.
+ *
+ * The IDE driver uses the single global io_request_lock spinlock to protect
+ * access to the request queues, and to protect the hwgroup->busy flag.
+ *
+ * The first thread into the driver for a particular hwgroup sets the
+ * hwgroup->busy flag to indicate that this hwgroup is now active,
+ * and then initiates processing of the top request from the request queue.
+ *
+ * Other threads attempting entry notice the busy setting, and will simply
+ * queue their new requests and exit immediately.  Note that hwgroup->busy
+ * remains set even when the driver is merely awaiting the next interrupt.
+ * Thus, the meaning is "this hwgroup is busy processing a request".
+ *
+ * When processing of a request completes, the completing thread or IRQ-handler
+ * will start the next request from the queue.  If no more work remains,
+ * the driver will clear the hwgroup->busy flag and exit.
+ *
+ * The io_request_lock (spinlock) is used to protect all access to the
+ * hwgroup->busy flag, but is otherwise not needed for most processing in
+ * the driver.  This makes the driver much more friendlier to shared IRQs
+ * than previous designs, while remaining 100% (?) SMP safe and capable.
+ */
+/* --BenH: made non-static as ide-pmac.c uses it to kick the hwgroup back
+ *         into life on wakeup from machine sleep.
+ */ 
+void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+{
+	ide_drive_t	*drive;
+	ide_hwif_t	*hwif;
+	struct request	*rq;
+	ide_startstop_t	startstop;
+
+	ide_get_lock(&ide_lock, ide_intr, hwgroup);	/* for atari only: POSSIBLY BROKEN HERE(?) */
+
+	__cli();	/* necessary paranoia: ensure IRQs are masked on local CPU */
+
+	while (!hwgroup->busy) {
+		hwgroup->busy = 1;
+		drive = choose_drive(hwgroup);
+		if (drive == NULL) {
+			unsigned long sleep = 0;
+			hwgroup->rq = NULL;
+			drive = hwgroup->drive;
+			do {
+				if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep)))
+					sleep = drive->sleep;
+			} while ((drive = drive->next) != hwgroup->drive);
+			if (sleep) {
+				/*
+				 * Take a short snooze, and then wake up this hwgroup again.
+				 * This gives other hwgroups on the same a chance to
+				 * play fairly with us, just in case there are big differences
+				 * in relative throughputs.. don't want to hog the cpu too much.
+				 */
+				if (0 < (signed long)(jiffies + WAIT_MIN_SLEEP - sleep)) 
+					sleep = jiffies + WAIT_MIN_SLEEP;
+#if 1
+				if (timer_pending(&hwgroup->timer))
+					printk("ide_set_handler: timer already active\n");
+#endif
+				hwgroup->sleeping = 1;	/* so that ide_timer_expiry knows what to do */
+				mod_timer(&hwgroup->timer, sleep);
+				/* we purposely leave hwgroup->busy==1 while sleeping */
+			} else {
+				/* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */
+				ide_release_lock(&ide_lock);	/* for atari only */
+				hwgroup->busy = 0;
+			}
+			return;		/* no more work for this hwgroup (for now) */
+		}
+		hwif = HWIF(drive);
+		if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif && hwif->io_ports[IDE_CONTROL_OFFSET]) {
+			/* set nIEN for previous hwif */
+			SELECT_INTERRUPT(hwif, drive);
+		}
+		hwgroup->hwif = hwif;
+		hwgroup->drive = drive;
+		drive->sleep = 0;
+		drive->service_start = jiffies;
+
+		if ( drive->queue.plugged )	/* paranoia */
+			printk("%s: Huh? nuking plugged queue\n", drive->name);
+
+		rq = hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
+		/*
+		 * Some systems have trouble with IDE IRQs arriving while
+		 * the driver is still setting things up.  So, here we disable
+		 * the IRQ used by this interface while the request is being started.
+		 * This may look bad at first, but pretty much the same thing
+		 * happens anyway when any interrupt comes in, IDE or otherwise
+		 *  -- the kernel masks the IRQ while it is being handled.
+		 */
+		if (masked_irq && hwif->irq != masked_irq)
+			disable_irq_nosync(hwif->irq);
+		spin_unlock(&io_request_lock);
+		ide__sti();	/* allow other IRQs while we start this request */
+		startstop = start_request(drive, rq);
+		spin_lock_irq(&io_request_lock);
+		if (masked_irq && hwif->irq != masked_irq)
+			enable_irq(hwif->irq);
+		if (startstop == ide_stopped)
+			hwgroup->busy = 0;
+	}
+}
+
+/*
+ * ide_get_queue() returns the queue which corresponds to a given device.
+ */
+request_queue_t *ide_get_queue (kdev_t dev)
+{
+	ide_hwif_t *hwif = (ide_hwif_t *)blk_dev[MAJOR(dev)].data;
+
+	return &hwif->drives[DEVICE_NR(dev) & 1].queue;
+}
+
+/*
+ * Passes the stuff to ide_do_request
+ */
+void do_ide_request(request_queue_t *q)
+{
+	ide_do_request(q->queuedata, 0);
+}
+
+/*
+ * un-busy the hwgroup etc, and clear any pending DMA status. we want to
+ * retry the current request in pio mode instead of risking tossing it
+ * all away
+ */
+void ide_dma_timeout_retry(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = HWIF(drive);
+	struct request *rq;
+
+	/*
+	 * end current dma transaction
+	 */
+	(void) hwif->dmaproc(ide_dma_end, drive);
+
+	/*
+	 * complain a little, later we might remove some of this verbosity
+	 */
+	printk("%s: timeout waiting for DMA\n", drive->name);
+	(void) hwif->dmaproc(ide_dma_timeout, drive);
+
+	/*
+	 * disable dma for now, but remember that we did so because of
+	 * a timeout -- we'll reenable after we finish this next request
+	 * (or rather the first chunk of it) in pio.
+	 */
+	drive->retry_pio++;
+	drive->state = DMA_PIO_RETRY;
+	(void) hwif->dmaproc(ide_dma_off_quietly, drive);
+
+	/*
+	 * un-busy drive etc (hwgroup->busy is cleared on return) and
+	 * make sure request is sane
+	 */
+	rq = HWGROUP(drive)->rq;
+	HWGROUP(drive)->rq = NULL;
+
+	rq->errors = 0;
+	rq->sector = rq->bh->b_rsector;
+	rq->current_nr_sectors = rq->bh->b_size >> 9;
+	rq->buffer = rq->bh->b_data;
+}
+
+/*
+ * ide_timer_expiry() is our timeout function for all drive operations.
+ * But note that it can also be invoked as a result of a "sleep" operation
+ * triggered by the mod_timer() call in ide_do_request.
+ */
+void ide_timer_expiry (unsigned long data)
+{
+	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
+	ide_handler_t	*handler;
+	ide_expiry_t	*expiry;
+ 	unsigned long	flags;
+	unsigned long	wait;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	del_timer(&hwgroup->timer);
+
+	if ((handler = hwgroup->handler) == NULL) {
+		/*
+		 * Either a marginal timeout occurred
+		 * (got the interrupt just as timer expired),
+		 * or we were "sleeping" to give other devices a chance.
+		 * Either way, we don't really want to complain about anything.
+		 */
+		if (hwgroup->sleeping) {
+			hwgroup->sleeping = 0;
+			hwgroup->busy = 0;
+		}
+	} else {
+		ide_drive_t *drive = hwgroup->drive;
+		if (!drive) {
+			printk("ide_timer_expiry: hwgroup->drive was NULL\n");
+			hwgroup->handler = NULL;
+		} else {
+			ide_hwif_t *hwif;
+			ide_startstop_t startstop;
+			if (!hwgroup->busy) {
+				hwgroup->busy = 1;	/* paranoia */
+				printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name);
+			}
+			if ((expiry = hwgroup->expiry) != NULL) {
+				/* continue */
+				if ((wait = expiry(drive)) != 0) {
+					/* reset timer */
+					hwgroup->timer.expires  = jiffies + wait;
+					add_timer(&hwgroup->timer);
+					spin_unlock_irqrestore(&io_request_lock, flags);
+					return;
+				}
+			}
+			hwgroup->handler = NULL;
+			/*
+			 * We need to simulate a real interrupt when invoking
+			 * the handler() function, which means we need to globally
+			 * mask the specific IRQ:
+			 */
+			spin_unlock(&io_request_lock);
+			hwif  = HWIF(drive);
+#if DISABLE_IRQ_NOSYNC
+			disable_irq_nosync(hwif->irq);
+#else
+			disable_irq(hwif->irq);	/* disable_irq_nosync ?? */
+#endif /* DISABLE_IRQ_NOSYNC */
+			__cli();	/* local CPU only, as if we were handling an interrupt */
+			if (hwgroup->poll_timeout != 0) {
+				startstop = handler(drive);
+			} else if (drive_is_ready(drive)) {
+				if (drive->waiting_for_dma)
+					(void) hwgroup->hwif->dmaproc(ide_dma_lostirq, drive);
+				(void)ide_ack_intr(hwif);
+				printk("%s: lost interrupt\n", drive->name);
+				startstop = handler(drive);
+			} else {
+				if (drive->waiting_for_dma) {
+					startstop = ide_stopped;
+					ide_dma_timeout_retry(drive);
+				} else
+					startstop = ide_error(drive, "irq timeout", GET_STAT());
+			}
+			set_recovery_timer(hwif);
+			drive->service_time = jiffies - drive->service_start;
+			enable_irq(hwif->irq);
+			spin_lock_irq(&io_request_lock);
+			if (startstop == ide_stopped)
+				hwgroup->busy = 0;
+		}
+	}
+	ide_do_request(hwgroup, 0);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * There's nothing really useful we can do with an unexpected interrupt,
+ * other than reading the status register (to clear it), and logging it.
+ * There should be no way that an irq can happen before we're ready for it,
+ * so we needn't worry much about losing an "important" interrupt here.
+ *
+ * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
+ * drive enters "idle", "standby", or "sleep" mode, so if the status looks
+ * "good", we just ignore the interrupt completely.
+ *
+ * This routine assumes __cli() is in effect when called.
+ *
+ * If an unexpected interrupt happens on irq15 while we are handling irq14
+ * and if the two interfaces are "serialized" (CMD640), then it looks like
+ * we could screw up by interfering with a new request being set up for irq15.
+ *
+ * In reality, this is a non-issue.  The new command is not sent unless the
+ * drive is ready to accept one, in which case we know the drive is not
+ * trying to interrupt us.  And ide_set_handler() is always invoked before
+ * completing the issuance of any new drive command, so we will not be
+ * accidentally invoked as a result of any valid command completion interrupt.
+ *
+ */
+static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
+{
+	byte stat;
+	ide_hwif_t *hwif = hwgroup->hwif;
+
+	/*
+	 * handle the unexpected interrupt
+	 */
+	do {
+		if (hwif->irq == irq) {
+			stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+			if (!OK_STAT(stat, READY_STAT, BAD_STAT)) {
+				/* Try to not flood the console with msgs */
+				static unsigned long last_msgtime, count;
+				++count;
+				if (0 < (signed long)(jiffies - (last_msgtime + HZ))) {
+					last_msgtime = jiffies;
+					printk("%s%s: unexpected interrupt, status=0x%02x, count=%ld\n",
+					 hwif->name, (hwif->next == hwgroup->hwif) ? "" : "(?)", stat, count);
+				}
+			}
+		}
+	} while ((hwif = hwif->next) != hwgroup->hwif);
+}
+
+/*
+ * entry point for all interrupts, caller does __cli() for us
+ */
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned long flags;
+	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
+	ide_hwif_t *hwif;
+	ide_drive_t *drive;
+	ide_handler_t *handler;
+	ide_startstop_t startstop;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	hwif = hwgroup->hwif;
+
+	if (!ide_ack_intr(hwif)) {
+		spin_unlock_irqrestore(&io_request_lock, flags);
+		return;
+	}
+
+	if ((handler = hwgroup->handler) == NULL || hwgroup->poll_timeout != 0) {
+		/*
+		 * Not expecting an interrupt from this drive.
+		 * That means this could be:
+		 *	(1) an interrupt from another PCI device
+		 *	sharing the same PCI INT# as us.
+		 * or	(2) a drive just entered sleep or standby mode,
+		 *	and is interrupting to let us know.
+		 * or	(3) a spurious interrupt of unknown origin.
+		 *
+		 * For PCI, we cannot tell the difference,
+		 * so in that case we just ignore it and hope it goes away.
+		 */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+		if (IDE_PCI_DEVID_EQ(hwif->pci_devid, IDE_PCI_DEVID_NULL))
+#endif	/* CONFIG_BLK_DEV_IDEPCI */
+		{
+			/*
+			 * Probably not a shared PCI interrupt,
+			 * so we can safely try to do something about it:
+			 */
+			unexpected_intr(irq, hwgroup);
+#ifdef CONFIG_BLK_DEV_IDEPCI
+		} else {
+			/*
+			 * Whack the status register, just in case we have a leftover pending IRQ.
+			 */
+			(void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+		}
+		spin_unlock_irqrestore(&io_request_lock, flags);
+		return;
+	}
+	drive = hwgroup->drive;
+	if (!drive) {
+		/*
+		 * This should NEVER happen, and there isn't much we could do about it here.
+		 */
+		spin_unlock_irqrestore(&io_request_lock, flags);
+		return;
+	}
+	if (!drive_is_ready(drive)) {
+		/*
+		 * This happens regularly when we share a PCI IRQ with another device.
+		 * Unfortunately, it can also happen with some buggy drives that trigger
+		 * the IRQ before their status register is up to date.  Hopefully we have
+		 * enough advance overhead that the latter isn't a problem.
+		 */
+		spin_unlock_irqrestore(&io_request_lock, flags);
+		return;
+	}
+	if (!hwgroup->busy) {
+		hwgroup->busy = 1;	/* paranoia */
+		printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
+	}
+	hwgroup->handler = NULL;
+	del_timer(&hwgroup->timer);
+	spin_unlock(&io_request_lock);
+
+	if (drive->unmask)
+		ide__sti();	/* local CPU only */
+	startstop = handler(drive);		/* service this interrupt, may set handler for next interrupt */
+	spin_lock_irq(&io_request_lock);
+
+	/*
+	 * Note that handler() may have set things up for another
+	 * interrupt to occur soon, but it cannot happen until
+	 * we exit from this routine, because it will be the
+	 * same irq as is currently being serviced here, and Linux
+	 * won't allow another of the same (on any CPU) until we return.
+	 */
+	set_recovery_timer(HWIF(drive));
+	drive->service_time = jiffies - drive->service_start;
+	if (startstop == ide_stopped) {
+		if (hwgroup->handler == NULL) {	/* paranoia */
+			hwgroup->busy = 0;
+			ide_do_request(hwgroup, hwif->irq);
+		} else {
+			printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name);
+		}
+	}
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * get_info_ptr() returns the (ide_drive_t *) for a given device number.
+ * It returns NULL if the given device number does not match any present drives.
+ */
+ide_drive_t *get_info_ptr (kdev_t i_rdev)
+{
+	int		major = MAJOR(i_rdev);
+#if 0
+	int		minor = MINOR(i_rdev) & PARTN_MASK;
+#endif
+	unsigned int	h;
+
+	for (h = 0; h < MAX_HWIFS; ++h) {
+		ide_hwif_t  *hwif = &ide_hwifs[h];
+		if (hwif->present && major == hwif->major) {
+			unsigned unit = DEVICE_NR(i_rdev);
+			if (unit < MAX_DRIVES) {
+				ide_drive_t *drive = &hwif->drives[unit];
+#if 0
+				if ((drive->present) && (drive->part[minor].nr_sects))
+#else
+				if (drive->present)
+#endif
+					return drive;
+			}
+			break;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_cmd (struct request *rq)
+{
+	memset(rq, 0, sizeof(*rq));
+	rq->cmd = IDE_DRIVE_CMD;
+}
+
+/*
+ * This function issues a special IDE device request
+ * onto the request queue.
+ *
+ * If action is ide_wait, then the rq is queued at the end of the
+ * request queue, and the function sleeps until it has been processed.
+ * This is for use when invoked from an ioctl handler.
+ *
+ * If action is ide_preempt, then the rq is queued at the head of
+ * the request queue, displacing the currently-being-processed
+ * request and this function returns immediately without waiting
+ * for the new rq to be completed.  This is VERY DANGEROUS, and is
+ * intended for careful use by the ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_next, then the rq is queued immediately after
+ * the currently-being-processed-request (if any), and the function
+ * returns without waiting for the new rq to be completed.  As above,
+ * This is VERY DANGEROUS, and is intended for careful use by the
+ * ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_end, then the rq is queued at the end of the
+ * request queue, and the function returns immediately without waiting
+ * for the new rq to be completed. This is again intended for careful
+ * use by the ATAPI tape/cdrom driver code.
+ */
+int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action)
+{
+	unsigned long flags;
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	unsigned int major = HWIF(drive)->major;
+	struct list_head *queue_head = &drive->queue.queue_head;
+	/*DECLARE_COMPLETION(wait);*/
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+	if (HWIF(drive)->chipset == ide_pdc4030 && rq->buffer != NULL)
+		return -ENOSYS;  /* special drive cmds not supported */
+#endif
+	rq->errors = 0;
+	rq->rq_status = RQ_ACTIVE;
+	rq->rq_dev = MKDEV(major,(drive->select.b.unit)<<PARTN_BITS);
+	if (action == ide_wait) { 
+		printk("SMH says: wait on IDE device but no queue :-(\n"); 
+		return 0; 
+	} 
+	spin_lock_irqsave(&io_request_lock, flags);
+	if (list_empty(queue_head) || action == ide_preempt) {
+		if (action == ide_preempt)
+			hwgroup->rq = NULL;
+	} else {
+		if (action == ide_wait || action == ide_end) {
+			queue_head = queue_head->prev;
+		} else
+			queue_head = queue_head->next;
+	}
+	list_add(&rq->queue, queue_head);
+	ide_do_request(hwgroup, 0);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+	return 0;
+
+}
+
+/*
+ * This routine is called to flush all partitions and partition tables
+ * for a changed disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0.  If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int ide_revalidate_disk (kdev_t i_rdev)
+{
+	ide_drive_t *drive;
+	ide_hwgroup_t *hwgroup;
+	unsigned int p, major, minor;
+	unsigned long flags;
+
+	if ((drive = get_info_ptr(i_rdev)) == NULL)
+		return -ENODEV;
+	major = MAJOR(i_rdev);
+	minor = drive->select.b.unit << PARTN_BITS;
+	hwgroup = HWGROUP(drive);
+	spin_lock_irqsave(&io_request_lock, flags);
+	if (drive->busy || (drive->usage > 1)) {
+		spin_unlock_irqrestore(&io_request_lock, flags);
+		return -EBUSY;
+	};
+	drive->busy = 1;
+	MOD_INC_USE_COUNT;
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	for (p = 0; p < (1<<PARTN_BITS); ++p) {
+		if (drive->part[p].nr_sects > 0) {
+			kdev_t devp = MKDEV(major, minor+p);
+			invalidate_device(devp, 1);
+		}
+		drive->part[p].start_sect = 0;
+		drive->part[p].nr_sects   = 0;
+	};
+
+	if (DRIVER(drive)->revalidate)
+		DRIVER(drive)->revalidate(drive);
+
+	drive->busy = 0;
+	/*wake_up(&drive->wqueue);*/
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+static void revalidate_drives (void)
+{
+	ide_hwif_t *hwif;
+	ide_drive_t *drive;
+	int index, unit;
+
+	for (index = 0; index < MAX_HWIFS; ++index) {
+		hwif = &ide_hwifs[index];
+		for (unit = 0; unit < MAX_DRIVES; ++unit) {
+			drive = &ide_hwifs[index].drives[unit];
+			if (drive->revalidate) {
+				drive->revalidate = 0;
+				if (!initializing)
+					(void) ide_revalidate_disk(MKDEV(hwif->major, unit<<PARTN_BITS));
+			}
+		}
+	}
+}
+
+static void ide_probe_module (void)
+{
+	if (!ide_probe) {
+#if defined(CONFIG_KMOD) && defined(CONFIG_BLK_DEV_IDE_MODULE)
+		(void) request_module("ide-probe-mod");
+#endif /* (CONFIG_KMOD) && (CONFIG_BLK_DEV_IDE_MODULE) */
+	} else {
+		(void) ide_probe->init();
+	}
+	revalidate_drives();
+}
+
+static void ide_driver_module (void)
+{
+	int index;
+	ide_module_t *module = ide_modules;
+
+	for (index = 0; index < MAX_HWIFS; ++index)
+		if (ide_hwifs[index].present)
+			goto search;
+	ide_probe_module();
+search:
+	while (module) {
+		(void) module->init();
+		module = module->next;
+	}
+	revalidate_drives();
+}
+
+static int ide_open (struct inode * inode, struct file * filp)
+{
+	ide_drive_t *drive;
+
+	if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+		return -ENXIO;
+	if (drive->driver == NULL)
+		ide_driver_module();
+#ifdef CONFIG_KMOD
+	if (drive->driver == NULL) {
+		if (drive->media == ide_disk)
+			(void) request_module("ide-disk");
+		if (drive->media == ide_cdrom)
+			(void) request_module("ide-cd");
+		if (drive->media == ide_tape)
+			(void) request_module("ide-tape");
+		if (drive->media == ide_floppy)
+			(void) request_module("ide-floppy");
+#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI)
+		if (drive->media == ide_scsi)
+			(void) request_module("ide-scsi");
+#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */
+	}
+#endif /* CONFIG_KMOD */
+#if 0
+	while (drive->busy)
+		sleep_on(&drive->wqueue);
+#endif
+	drive->usage++;
+	if (drive->driver != NULL)
+		return DRIVER(drive)->open(inode, filp, drive);
+	printk ("%s: driver not present\n", drive->name);
+	drive->usage--;
+	return -ENXIO;
+}
+
+/*
+ * Releasing a block device means we sync() it, so that it can safely
+ * be forgotten about...
+ */
+static int ide_release (struct inode * inode, struct file * file)
+{
+	ide_drive_t *drive;
+
+	if ((drive = get_info_ptr(inode->i_rdev)) != NULL) {
+		drive->usage--;
+		if (drive->driver != NULL)
+			DRIVER(drive)->release(inode, file, drive);
+	}
+	return 0;
+}
+
+int ide_replace_subdriver (ide_drive_t *drive, const char *driver)
+{
+	if (!drive->present || drive->busy || drive->usage)
+		goto abort;
+	if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+		goto abort;
+	strncpy(drive->driver_req, driver, 9);
+	ide_driver_module();
+	drive->driver_req[0] = 0;
+	ide_driver_module();
+	if (DRIVER(drive) && !strcmp(DRIVER(drive)->name, driver))
+		return 0;
+abort:
+	return 1;
+}
+
+#ifdef CONFIG_PROC_FS
+ide_proc_entry_t generic_subdriver_entries[] = {
+	{ "capacity",	S_IFREG|S_IRUGO,	proc_ide_read_capacity,	NULL },
+	{ NULL, 0, NULL, NULL }
+};
+#endif
+
+/*
+ * Note that we only release the standard ports,
+ * and do not even try to handle any extra ports
+ * allocated for weird IDE interface chipsets.
+ */
+void hwif_unregister (ide_hwif_t *hwif)
+{
+	if (hwif->straight8) {
+		ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 8);
+		goto jump_eight;
+	}
+	if (hwif->io_ports[IDE_DATA_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 1);
+	if (hwif->io_ports[IDE_ERROR_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_ERROR_OFFSET], 1);
+	if (hwif->io_ports[IDE_NSECTOR_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1);
+	if (hwif->io_ports[IDE_SECTOR_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1);
+	if (hwif->io_ports[IDE_LCYL_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_LCYL_OFFSET], 1);
+	if (hwif->io_ports[IDE_HCYL_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_HCYL_OFFSET], 1);
+	if (hwif->io_ports[IDE_SELECT_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_SELECT_OFFSET], 1);
+	if (hwif->io_ports[IDE_STATUS_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_STATUS_OFFSET], 1);
+jump_eight:
+	if (hwif->io_ports[IDE_CONTROL_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+	if (hwif->io_ports[IDE_IRQ_OFFSET])
+		ide_release_region(hwif->io_ports[IDE_IRQ_OFFSET], 1);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+}
+
+void ide_unregister (unsigned int index)
+{
+	struct gendisk *gd;
+	ide_drive_t *drive, *d;
+	ide_hwif_t *hwif, *g;
+	ide_hwgroup_t *hwgroup;
+	int irq_count = 0, unit, i;
+	unsigned long flags;
+	unsigned int p, minor;
+	ide_hwif_t old_hwif;
+
+	if (index >= MAX_HWIFS)
+		return;
+	save_flags(flags);	/* all CPUs */
+	cli();			/* all CPUs */
+	hwif = &ide_hwifs[index];
+	if (!hwif->present)
+		goto abort;
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		drive = &hwif->drives[unit];
+		if (!drive->present)
+			continue;
+		if (drive->busy || drive->usage)
+			goto abort;
+		if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+			goto abort;
+	}
+	hwif->present = 0;
+	
+	/*
+	 * All clear?  Then blow away the buffer cache
+	 */
+	sti();
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		drive = &hwif->drives[unit];
+		if (!drive->present)
+			continue;
+		minor = drive->select.b.unit << PARTN_BITS;
+		for (p = 0; p < (1<<PARTN_BITS); ++p) {
+			if (drive->part[p].nr_sects > 0) {
+				kdev_t devp = MKDEV(hwif->major, minor+p);
+				invalidate_device(devp, 0);
+			}
+		}
+#ifdef CONFIG_PROC_FS
+		destroy_proc_ide_drives(hwif);
+#endif
+	}
+	cli();
+	hwgroup = hwif->hwgroup;
+
+	/*
+	 * free the irq if we were the only hwif using it
+	 */
+	g = hwgroup->hwif;
+	do {
+		if (g->irq == hwif->irq)
+			++irq_count;
+		g = g->next;
+	} while (g != hwgroup->hwif);
+	if (irq_count == 1)
+		free_irq(hwif->irq, hwgroup);
+
+	/*
+	 * Note that we only release the standard ports,
+	 * and do not even try to handle any extra ports
+	 * allocated for weird IDE interface chipsets.
+	 */
+	hwif_unregister(hwif);
+
+	/*
+	 * Remove us from the hwgroup, and free
+	 * the hwgroup if we were the only member
+	 */
+	d = hwgroup->drive;
+	for (i = 0; i < MAX_DRIVES; ++i) {
+		drive = &hwif->drives[i];
+#ifdef DEVFS_MUST_DIE
+		if (drive->de) {
+			devfs_unregister (drive->de);
+			drive->de = NULL;
+		}
+#endif
+		if (!drive->present)
+			continue;
+		while (hwgroup->drive->next != drive)
+			hwgroup->drive = hwgroup->drive->next;
+		hwgroup->drive->next = drive->next;
+		if (hwgroup->drive == drive)
+			hwgroup->drive = NULL;
+		if (drive->id != NULL) {
+			kfree(drive->id);
+			drive->id = NULL;
+		}
+		drive->present = 0;
+		blk_cleanup_queue(&drive->queue);
+	}
+	if (d->present)
+		hwgroup->drive = d;
+	while (hwgroup->hwif->next != hwif)
+		hwgroup->hwif = hwgroup->hwif->next;
+	hwgroup->hwif->next = hwif->next;
+	if (hwgroup->hwif == hwif)
+		kfree(hwgroup);
+	else
+		hwgroup->hwif = HWIF(hwgroup->drive);
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+	if (hwif->dma_base) {
+		(void) ide_release_dma(hwif);
+		hwif->dma_base = 0;
+	}
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+	/*
+	 * Remove us from the kernel's knowledge
+	 */
+	unregister_blkdev(hwif->major, hwif->name);
+	kfree(blksize_size[hwif->major]);
+	kfree(max_sectors[hwif->major]);
+	/*kfree(max_readahead[hwif->major]);*/
+	blk_dev[hwif->major].data = NULL;
+	blk_dev[hwif->major].queue = NULL;
+	blksize_size[hwif->major] = NULL;
+	gd = hwif->gd;
+	if (gd) {
+		del_gendisk(gd);
+		kfree(gd->sizes);
+		kfree(gd->part);
+#ifdef DEVFS_MUST_DIE
+		if (gd->de_arr)
+			kfree (gd->de_arr);
+#endif
+		if (gd->flags)
+			kfree (gd->flags);
+		kfree(gd);
+		hwif->gd = NULL;
+	}
+	old_hwif		= *hwif;
+	init_hwif_data (index);	/* restore hwif data to pristine status */
+	hwif->hwgroup		= old_hwif.hwgroup;
+	hwif->tuneproc		= old_hwif.tuneproc;
+	hwif->speedproc		= old_hwif.speedproc;
+	hwif->selectproc	= old_hwif.selectproc;
+	hwif->resetproc		= old_hwif.resetproc;
+	hwif->intrproc		= old_hwif.intrproc;
+	hwif->maskproc		= old_hwif.maskproc;
+	hwif->quirkproc		= old_hwif.quirkproc;
+	hwif->rwproc		= old_hwif.rwproc;
+	hwif->ideproc		= old_hwif.ideproc;
+	hwif->dmaproc		= old_hwif.dmaproc;
+	hwif->busproc		= old_hwif.busproc;
+	hwif->bus_state		= old_hwif.bus_state;
+	hwif->dma_base		= old_hwif.dma_base;
+	hwif->dma_extra		= old_hwif.dma_extra;
+	hwif->config_data	= old_hwif.config_data;
+	hwif->select_data	= old_hwif.select_data;
+	hwif->proc		= old_hwif.proc;
+#ifndef CONFIG_BLK_DEV_IDECS
+	hwif->irq		= old_hwif.irq;
+#endif /* CONFIG_BLK_DEV_IDECS */
+	hwif->major		= old_hwif.major;
+	hwif->chipset		= old_hwif.chipset;
+	hwif->autodma		= old_hwif.autodma;
+	hwif->udma_four		= old_hwif.udma_four;
+#ifdef CONFIG_BLK_DEV_IDEPCI
+	hwif->pci_dev		= old_hwif.pci_dev;
+	hwif->pci_devid		= old_hwif.pci_devid;
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+	hwif->straight8		= old_hwif.straight8;
+	hwif->hwif_data		= old_hwif.hwif_data;
+abort:
+	restore_flags(flags);	/* all CPUs */
+}
+
+/*
+ * Setup hw_regs_t structure described by parameters.  You
+ * may set up the hw structure yourself OR use this routine to
+ * do it for you.
+ */
+void ide_setup_ports (	hw_regs_t *hw,
+			ide_ioreg_t base, int *offsets,
+			ide_ioreg_t ctrl, ide_ioreg_t intr,
+			ide_ack_intr_t *ack_intr, int irq)
+{
+	int i;
+
+	for (i = 0; i < IDE_NR_PORTS; i++) {
+		if (offsets[i] == -1) {
+			switch(i) {
+				case IDE_CONTROL_OFFSET:
+					hw->io_ports[i] = ctrl;
+					break;
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+				case IDE_IRQ_OFFSET:
+					hw->io_ports[i] = intr;
+					break;
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+				default:
+					hw->io_ports[i] = 0;
+					break;
+			}
+		} else {
+			hw->io_ports[i] = base + offsets[i];
+		}
+	}
+	hw->irq = irq;
+	hw->dma = NO_DMA;
+	hw->ack_intr = ack_intr;
+}
+
+/*
+ * Register an IDE interface, specifing exactly the registers etc
+ * Set init=1 iff calling before probes have taken place.
+ */
+int ide_register_hw (hw_regs_t *hw, ide_hwif_t **hwifp)
+{
+	int index, retry = 1;
+	ide_hwif_t *hwif;
+
+	do {
+		for (index = 0; index < MAX_HWIFS; ++index) {
+			hwif = &ide_hwifs[index];
+			if (hwif->hw.io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
+				goto found;
+		}
+		for (index = 0; index < MAX_HWIFS; ++index) {
+			hwif = &ide_hwifs[index];
+			if ((!hwif->present && !hwif->mate && !initializing) ||
+			    (!hwif->hw.io_ports[IDE_DATA_OFFSET] && initializing))
+				goto found;
+		}
+		for (index = 0; index < MAX_HWIFS; index++)
+			ide_unregister(index);
+	} while (retry--);
+	return -1;
+found:
+	if (hwif->present)
+		ide_unregister(index);
+	if (hwif->present)
+		return -1;
+	memcpy(&hwif->hw, hw, sizeof(*hw));
+	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+	hwif->irq = hw->irq;
+	hwif->noprobe = 0;
+	hwif->chipset = hw->chipset;
+
+	if (!initializing) {
+		ide_probe_module();
+#ifdef CONFIG_PROC_FS
+		create_proc_ide_interfaces();
+#endif
+		ide_driver_module();
+	}
+
+	if (hwifp)
+		*hwifp = hwif;
+
+	return (initializing || hwif->present) ? index : -1;
+}
+
+/*
+ * Compatability function with existing drivers.  If you want
+ * something different, use the function above.
+ */
+int ide_register (int arg1, int arg2, int irq)
+{
+	hw_regs_t hw;
+	ide_init_hwif_ports(&hw, (ide_ioreg_t) arg1, (ide_ioreg_t) arg2, NULL);
+	hw.irq = irq;
+	return ide_register_hw(&hw, NULL);
+}
+
+void ide_add_setting (ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set)
+{
+	ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL;
+
+	while ((*p) && strcmp((*p)->name, name) < 0)
+		p = &((*p)->next);
+	if ((setting = kmalloc(sizeof(*setting), GFP_KERNEL)) == NULL)
+		goto abort;
+	memset(setting, 0, sizeof(*setting));
+	if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL)
+		goto abort;
+	strcpy(setting->name, name);		setting->rw = rw;
+	setting->read_ioctl = read_ioctl;	setting->write_ioctl = write_ioctl;
+	setting->data_type = data_type;		setting->min = min;
+	setting->max = max;			setting->mul_factor = mul_factor;
+	setting->div_factor = div_factor;	setting->data = data;
+	setting->set = set;			setting->next = *p;
+	if (drive->driver)
+		setting->auto_remove = 1;
+	*p = setting;
+	return;
+abort:
+	if (setting)
+		kfree(setting);
+}
+
+void ide_remove_setting (ide_drive_t *drive, char *name)
+{
+	ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting;
+
+	while ((*p) && strcmp((*p)->name, name))
+		p = &((*p)->next);
+	if ((setting = (*p)) == NULL)
+		return;
+	(*p) = setting->next;
+	kfree(setting->name);
+	kfree(setting);
+}
+
+static ide_settings_t *ide_find_setting_by_ioctl (ide_drive_t *drive, int cmd)
+{
+	ide_settings_t *setting = drive->settings;
+
+	while (setting) {
+		if (setting->read_ioctl == cmd || setting->write_ioctl == cmd)
+			break;
+		setting = setting->next;
+	}
+	return setting;
+}
+
+ide_settings_t *ide_find_setting_by_name (ide_drive_t *drive, char *name)
+{
+	ide_settings_t *setting = drive->settings;
+
+	while (setting) {
+		if (strcmp(setting->name, name) == 0)
+			break;
+		setting = setting->next;
+	}
+	return setting;
+}
+
+static void auto_remove_settings (ide_drive_t *drive)
+{
+	ide_settings_t *setting;
+repeat:
+	setting = drive->settings;
+	while (setting) {
+		if (setting->auto_remove) {
+			ide_remove_setting(drive, setting->name);
+			goto repeat;
+		}
+		setting = setting->next;
+	}
+}
+
+int ide_read_setting (ide_drive_t *drive, ide_settings_t *setting)
+{
+	int		val = -EINVAL;
+	unsigned long	flags;
+
+	if ((setting->rw & SETTING_READ)) {
+		spin_lock_irqsave(&io_request_lock, flags);
+		switch(setting->data_type) {
+			case TYPE_BYTE:
+				val = *((u8 *) setting->data);
+				break;
+			case TYPE_SHORT:
+				val = *((u16 *) setting->data);
+				break;
+			case TYPE_INT:
+			case TYPE_INTA:
+				val = *((u32 *) setting->data);
+				break;
+		}
+		spin_unlock_irqrestore(&io_request_lock, flags);
+	}
+	return val;
+}
+
+int ide_spin_wait_hwgroup (ide_drive_t *drive)
+{
+	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	unsigned long timeout = jiffies + (3 * HZ);
+
+	spin_lock_irq(&io_request_lock);
+
+	while (hwgroup->busy) {
+		unsigned long lflags;
+		spin_unlock_irq(&io_request_lock);
+		__save_flags(lflags);	/* local CPU only */
+		__sti();		/* local CPU only; needed for jiffies */
+		if (0 < (signed long)(jiffies - timeout)) {
+			__restore_flags(lflags);	/* local CPU only */
+			printk("%s: channel busy\n", drive->name);
+			return -EBUSY;
+		}
+		__restore_flags(lflags);	/* local CPU only */
+		spin_lock_irq(&io_request_lock);
+	}
+	return 0;
+}
+
+/*
+ * FIXME:  This should be changed to enqueue a special request
+ * to the driver to change settings, and then wait on a sema for completion.
+ * The current scheme of polling is kludgey, though safe enough.
+ */
+int ide_write_setting (ide_drive_t *drive, ide_settings_t *setting, int val)
+{
+	int i;
+	u32 *p;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (!(setting->rw & SETTING_WRITE))
+		return -EPERM;
+	if (val < setting->min || val > setting->max)
+		return -EINVAL;
+	if (setting->set)
+		return setting->set(drive, val);
+	if (ide_spin_wait_hwgroup(drive))
+		return -EBUSY;
+	switch (setting->data_type) {
+		case TYPE_BYTE:
+			*((u8 *) setting->data) = val;
+			break;
+		case TYPE_SHORT:
+			*((u16 *) setting->data) = val;
+			break;
+		case TYPE_INT:
+			*((u32 *) setting->data) = val;
+			break;
+		case TYPE_INTA:
+			p = (u32 *) setting->data;
+			for (i = 0; i < 1 << PARTN_BITS; i++, p++)
+				*p = val;
+			break;
+	}
+	spin_unlock_irq(&io_request_lock);
+	return 0;
+}
+
+static int set_io_32bit(ide_drive_t *drive, int arg)
+{
+	drive->io_32bit = arg;
+#ifdef CONFIG_BLK_DEV_DTC2278
+	if (HWIF(drive)->chipset == ide_dtc2278)
+		HWIF(drive)->drives[!drive->select.b.unit].io_32bit = arg;
+#endif /* CONFIG_BLK_DEV_DTC2278 */
+	return 0;
+}
+
+static int set_using_dma (ide_drive_t *drive, int arg)
+{
+	if (!drive->driver || !DRIVER(drive)->supports_dma)
+		return -EPERM;
+	if (!drive->id || !(drive->id->capability & 1) || !HWIF(drive)->dmaproc)
+		return -EPERM;
+	if (HWIF(drive)->dmaproc(arg ? ide_dma_on : ide_dma_off, drive))
+		return -EIO;
+	return 0;
+}
+
+static int set_pio_mode (ide_drive_t *drive, int arg)
+{
+	struct request rq;
+
+	if (!HWIF(drive)->tuneproc)
+		return -ENOSYS;
+	if (drive->special.b.set_tune)
+		return -EBUSY;
+	ide_init_drive_cmd(&rq);
+	drive->tune_req = (byte) arg;
+	drive->special.b.set_tune = 1;
+	(void) ide_do_drive_cmd (drive, &rq, ide_wait);
+	return 0;
+}
+
+void ide_add_generic_settings (ide_drive_t *drive)
+{
+/*
+ *			drive	setting name		read/write access				read ioctl		write ioctl		data type	min	max				mul_factor	div_factor	data pointer			set function
+ */
+	ide_add_setting(drive,	"io_32bit",		drive->no_io_32bit ? SETTING_READ : SETTING_RW,	HDIO_GET_32BIT,		HDIO_SET_32BIT,		TYPE_BYTE,	0,	1 + (SUPPORT_VLB_SYNC << 1),	1,		1,		&drive->io_32bit,		set_io_32bit);
+	ide_add_setting(drive,	"keepsettings",		SETTING_RW,					HDIO_GET_KEEPSETTINGS,	HDIO_SET_KEEPSETTINGS,	TYPE_BYTE,	0,	1,				1,		1,		&drive->keep_settings,		NULL);
+	ide_add_setting(drive,	"nice1",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	1,				1,		1,		&drive->nice1,			NULL);
+	ide_add_setting(drive,	"pio_mode",		SETTING_WRITE,					-1,			HDIO_SET_PIO_MODE,	TYPE_BYTE,	0,	255,				1,		1,		NULL,				set_pio_mode);
+	ide_add_setting(drive,	"slow",			SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	1,				1,		1,		&drive->slow,			NULL);
+	ide_add_setting(drive,	"unmaskirq",		drive->no_unmask ? SETTING_READ : SETTING_RW,	HDIO_GET_UNMASKINTR,	HDIO_SET_UNMASKINTR,	TYPE_BYTE,	0,	1,				1,		1,		&drive->unmask,			NULL);
+	ide_add_setting(drive,	"using_dma",		SETTING_RW,					HDIO_GET_DMA,		HDIO_SET_DMA,		TYPE_BYTE,	0,	1,				1,		1,		&drive->using_dma,		set_using_dma);
+	ide_add_setting(drive,	"ide_scsi",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	1,				1,		1,		&drive->scsi,			NULL);
+	ide_add_setting(drive,	"init_speed",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	69,				1,		1,		&drive->init_speed,		NULL);
+	ide_add_setting(drive,	"current_speed",	SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	69,				1,		1,		&drive->current_speed,		NULL);
+	ide_add_setting(drive,	"number",		SETTING_RW,					-1,			-1,			TYPE_BYTE,	0,	3,				1,		1,		&drive->dn,			NULL);
+}
+
+int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf)
+{
+	struct request rq;
+	byte buffer[4];
+
+	if (!buf)
+		buf = buffer;
+	memset(buf, 0, 4 + SECTOR_WORDS * 4 * sectors);
+	ide_init_drive_cmd(&rq);
+	rq.buffer = buf;
+	*buf++ = cmd;
+	*buf++ = nsect;
+	*buf++ = feature;
+	*buf++ = sectors;
+	return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+int ide_wait_cmd_task (ide_drive_t *drive, byte *buf)
+{
+	struct request rq;
+
+	ide_init_drive_cmd(&rq);
+	rq.cmd = IDE_DRIVE_TASK;
+	rq.buffer = buf;
+	return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+/*
+ * Delay for *at least* 50ms.  As we don't know how much time is left
+ * until the next tick occurs, we wait an extra tick to be safe.
+ * This is used only during the probing/polling for drives at boot time.
+ *
+ * However, its usefullness may be needed in other places, thus we export it now.
+ * The future may change this to a millisecond setable delay.
+ */
+void ide_delay_50ms (void)
+{
+#ifndef CONFIG_BLK_DEV_IDECS
+	mdelay(50);
+#else
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(HZ/20);
+#endif /* CONFIG_BLK_DEV_IDECS */
+}
+
+int system_bus_clock (void)
+{
+	return((int) ((!system_bus_speed) ? ide_system_bus_speed() : system_bus_speed ));
+}
+
+int ide_reinit_drive (ide_drive_t *drive)
+{
+	switch (drive->media) {
+#ifdef CONFIG_BLK_DEV_IDECD
+		case ide_cdrom:
+		{
+			extern int ide_cdrom_reinit(ide_drive_t *drive);
+			if (ide_cdrom_reinit(drive))
+				return 1;
+			break;
+		}
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+		case ide_disk:
+		{
+			extern int idedisk_reinit(ide_drive_t *drive);
+			if (idedisk_reinit(drive))
+				return 1;
+			break;
+		}
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+		case ide_floppy:
+		{
+			extern int idefloppy_reinit(ide_drive_t *drive);
+			if (idefloppy_reinit(drive))
+				return 1;
+			break;
+		}
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+		case ide_tape:
+		{
+			extern int idetape_reinit(ide_drive_t *drive);
+			if (idetape_reinit(drive))
+				return 1;
+			break;
+		}
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+/*
+ *              {
+ *                      extern int idescsi_reinit(ide_drive_t *drive);
+ *                      if (idescsi_reinit(drive))
+ *                              return 1;
+ *                      break;
+ * }
+ */
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+		default:
+			return 1;
+	}
+	return 0;
+}
+
+static int ide_ioctl (struct inode *inode, struct file *file,
+			unsigned int cmd, unsigned long arg)
+{
+	int err = 0, major, minor;
+	ide_drive_t *drive;
+	struct request rq;
+	kdev_t dev;
+	ide_settings_t *setting;
+
+	if (!inode || !(dev = inode->i_rdev))
+		return -EINVAL;
+	major = MAJOR(dev); minor = MINOR(dev);
+	if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+		return -ENODEV;
+
+	if ((setting = ide_find_setting_by_ioctl(drive, cmd)) != NULL) {
+		if (cmd == setting->read_ioctl) {
+			err = ide_read_setting(drive, setting);
+			return err >= 0 ? put_user(err, (long *) arg) : err;
+		} else {
+			if ((MINOR(inode->i_rdev) & PARTN_MASK))
+				return -EINVAL;
+			return ide_write_setting(drive, setting, arg);
+		}
+	}
+
+	ide_init_drive_cmd (&rq);
+	switch (cmd) {
+		case HDIO_GETGEO:
+		{
+			struct hd_geometry *loc = (struct hd_geometry *) arg;
+			unsigned short bios_cyl = drive->bios_cyl; /* truncate */
+			if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+			if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+			if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+			if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
+			if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+				(unsigned long *) &loc->start)) return -EFAULT;
+			return 0;
+		}
+
+		case HDIO_GETGEO_BIG:
+		{
+			struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+			if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+			if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+			if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+			if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
+			if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+				(unsigned long *) &loc->start)) return -EFAULT;
+			return 0;
+		}
+
+		case HDIO_GETGEO_BIG_RAW:
+		{
+			struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+			if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+			if (put_user(drive->head, (byte *) &loc->heads)) return -EFAULT;
+			if (put_user(drive->sect, (byte *) &loc->sectors)) return -EFAULT;
+			if (put_user(drive->cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
+			if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+				(unsigned long *) &loc->start)) return -EFAULT;
+			return 0;
+		}
+
+#if 0
+	 	case BLKGETSIZE:   /* Return device size */
+			return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg);
+	 	case BLKGETSIZE64:
+			return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg);
+
+		case BLKRRPART: /* Re-read partition tables */
+			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+			return ide_revalidate_disk(inode->i_rdev);
+#endif
+
+		case HDIO_OBSOLETE_IDENTITY:
+		case HDIO_GET_IDENTITY:
+			if (MINOR(inode->i_rdev) & PARTN_MASK)
+				return -EINVAL;
+			if (drive->id == NULL)
+				return -ENOMSG;
+			if (copy_to_user((char *)arg, (char *)drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142))
+				return -EFAULT;
+			return 0;
+
+		case HDIO_GET_NICE:
+			return put_user(drive->dsc_overlap	<<	IDE_NICE_DSC_OVERLAP	|
+					drive->atapi_overlap	<<	IDE_NICE_ATAPI_OVERLAP	|
+					drive->nice0		<< 	IDE_NICE_0		|
+					drive->nice1		<<	IDE_NICE_1		|
+					drive->nice2		<<	IDE_NICE_2,
+					(long *) arg);
+
+#ifdef CONFIG_IDE_TASK_IOCTL
+		case HDIO_DRIVE_TASKFILE:
+		        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+				return -EACCES;
+			switch(drive->media) {
+				case ide_disk:
+					return ide_taskfile_ioctl(drive, inode, file, cmd, arg);
+#ifdef CONFIG_PKT_TASK_IOCTL
+				case ide_cdrom:
+				case ide_tape:
+				case ide_floppy:
+					return pkt_taskfile_ioctl(drive, inode, file, cmd, arg);
+#endif /* CONFIG_PKT_TASK_IOCTL */
+				default:
+					return -ENOMSG;
+			}
+#endif /* CONFIG_IDE_TASK_IOCTL */
+
+		case HDIO_DRIVE_CMD:
+		{
+			byte args[4], *argbuf = args;
+			byte xfer_rate = 0;
+			int argsize = 4;
+			ide_task_t tfargs;
+
+			if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+				return -EACCES;
+			if (NULL == (void *) arg)
+				return ide_do_drive_cmd(drive, &rq, ide_wait);
+			if (copy_from_user(args, (void *)arg, 4))
+				return -EFAULT;
+
+			tfargs.tfRegister[IDE_FEATURE_OFFSET] = args[2];
+			tfargs.tfRegister[IDE_NSECTOR_OFFSET] = args[3];
+			tfargs.tfRegister[IDE_SECTOR_OFFSET]  = args[1];
+			tfargs.tfRegister[IDE_LCYL_OFFSET]    = 0x00;
+			tfargs.tfRegister[IDE_HCYL_OFFSET]    = 0x00;
+			tfargs.tfRegister[IDE_SELECT_OFFSET]  = 0x00;
+			tfargs.tfRegister[IDE_COMMAND_OFFSET] = args[0];
+
+			if (args[3]) {
+				argsize = 4 + (SECTOR_WORDS * 4 * args[3]);
+				argbuf = kmalloc(argsize, GFP_KERNEL);
+				if (argbuf == NULL)
+					return -ENOMEM;
+				memcpy(argbuf, args, 4);
+			}
+
+			if (set_transfer(drive, &tfargs)) {
+				xfer_rate = args[1];
+				if (ide_ata66_check(drive, &tfargs))
+					goto abort;
+			}
+
+			err = ide_wait_cmd(drive, args[0], args[1], args[2], args[3], argbuf);
+
+			if (!err && xfer_rate) {
+				/* active-retuning-calls future */
+				if ((HWIF(drive)->speedproc) != NULL)
+					HWIF(drive)->speedproc(drive, xfer_rate);
+				ide_driveid_update(drive);
+			}
+		abort:
+			if (copy_to_user((void *)arg, argbuf, argsize))
+				err = -EFAULT;
+			if (argsize > 4)
+				kfree(argbuf);
+			return err;
+		}
+		case HDIO_DRIVE_TASK:
+		{
+			byte args[7], *argbuf = args;
+			int argsize = 7;
+			if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES;
+			if (copy_from_user(args, (void *)arg, 7))
+				return -EFAULT;
+			err = ide_wait_cmd_task(drive, argbuf);
+			if (copy_to_user((void *)arg, argbuf, argsize))
+				err = -EFAULT;
+			return err;
+		}
+		case HDIO_SCAN_HWIF:
+		{
+			int args[3];
+			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+			if (copy_from_user(args, (void *)arg, 3 * sizeof(int)))
+				return -EFAULT;
+			if (ide_register(args[0], args[1], args[2]) == -1)
+				return -EIO;
+			return 0;
+		}
+	        case HDIO_UNREGISTER_HWIF:
+			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+			/* (arg > MAX_HWIFS) checked in function */
+			ide_unregister(arg);
+			return 0;
+		case HDIO_SET_NICE:
+			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+			if (drive->driver == NULL)
+				return -EPERM;
+			if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1))))
+				return -EPERM;
+			drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1;
+			if (drive->dsc_overlap && !DRIVER(drive)->supports_dsc_overlap) {
+				drive->dsc_overlap = 0;
+				return -EPERM;
+			}
+			drive->nice1 = (arg >> IDE_NICE_1) & 1;
+			return 0;
+		case HDIO_DRIVE_RESET:
+		{
+			unsigned long flags;
+			ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+#if 1
+			spin_lock_irqsave(&io_request_lock, flags);
+			if (hwgroup->handler != NULL) {
+				printk("%s: ide_set_handler: handler not null; %p\n", drive->name, hwgroup->handler);
+				(void) hwgroup->handler(drive);
+//				hwgroup->handler = NULL;
+//				hwgroup->expiry	= NULL;
+				hwgroup->timer.expires = jiffies + 0;;
+				del_timer(&hwgroup->timer);
+			}
+			spin_unlock_irqrestore(&io_request_lock, flags);
+
+#endif
+			(void) ide_do_reset(drive);
+			if (drive->suspend_reset) {
+/*
+ *				APM WAKE UP todo !!
+ *				int nogoodpower = 1;
+ *				while(nogoodpower) {
+ *					check_power1() or check_power2()
+ *					nogoodpower = 0;
+ *				} 
+ *				HWIF(drive)->multiproc(drive);
+ */
+				return ide_revalidate_disk(inode->i_rdev);
+			}
+			return 0;
+		}
+#if 0
+		case BLKROSET:
+		case BLKROGET:
+		case BLKFLSBUF:
+		case BLKSSZGET:
+		case BLKPG:
+		case BLKELVGET:
+		case BLKELVSET:
+		case BLKBSZGET:
+		case BLKBSZSET:
+			return blk_ioctl(inode->i_rdev, cmd, arg);
+#endif
+
+		case HDIO_GET_BUSSTATE:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			if (put_user(HWIF(drive)->bus_state, (long *)arg))
+				return -EFAULT;
+			return 0;
+
+		case HDIO_SET_BUSSTATE:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EACCES;
+			if (HWIF(drive)->busproc)
+				HWIF(drive)->busproc(drive, (int)arg);
+			return 0;
+
+		default:
+			if (drive->driver != NULL)
+				return DRIVER(drive)->ioctl(drive, inode, file, cmd, arg);
+			return -EPERM;
+	}
+}
+
+static int ide_check_media_change (kdev_t i_rdev)
+{
+	ide_drive_t *drive;
+
+	if ((drive = get_info_ptr(i_rdev)) == NULL)
+		return -ENODEV;
+	if (drive->driver != NULL)
+		return DRIVER(drive)->media_change(drive);
+	return 0;
+}
+
+void ide_fixstring (byte *s, const int bytecount, const int byteswap)
+{
+	byte *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */
+
+	if (byteswap) {
+		/* convert from big-endian to host byte order */
+		for (p = end ; p != s;) {
+			unsigned short *pp = (unsigned short *) (p -= 2);
+			*pp = ntohs(*pp);
+		}
+	}
+
+	/* strip leading blanks */
+	while (s != end && *s == ' ')
+		++s;
+
+	/* compress internal blanks and strip trailing blanks */
+	while (s != end && *s) {
+		if (*s++ != ' ' || (s != end && *s && *s != ' '))
+			*p++ = *(s-1);
+	}
+
+	/* wipe out trailing garbage */
+	while (p != end)
+		*p++ = '\0';
+}
+
+/*
+ * stridx() returns the offset of c within s,
+ * or -1 if c is '\0' or not found within s.
+ */
+static int __init stridx (const char *s, char c)
+{
+	char *i = strchr(s, c);
+	return (i && c) ? i - s : -1;
+}
+
+/*
+ * match_parm() does parsing for ide_setup():
+ *
+ * 1. the first char of s must be '='.
+ * 2. if the remainder matches one of the supplied keywords,
+ *     the index (1 based) of the keyword is negated and returned.
+ * 3. if the remainder is a series of no more than max_vals numbers
+ *     separated by commas, the numbers are saved in vals[] and a
+ *     count of how many were saved is returned.  Base10 is assumed,
+ *     and base16 is allowed when prefixed with "0x".
+ * 4. otherwise, zero is returned.
+ */
+static int __init match_parm (char *s, const char *keywords[], int vals[], int max_vals)
+{
+	static const char *decimal = "0123456789";
+	static const char *hex = "0123456789abcdef";
+	int i, n;
+
+	if (*s++ == '=') {
+		/*
+		 * Try matching against the supplied keywords,
+		 * and return -(index+1) if we match one
+		 */
+		if (keywords != NULL) {
+			for (i = 0; *keywords != NULL; ++i) {
+				if (!strcmp(s, *keywords++))
+					return -(i+1);
+			}
+		}
+		/*
+		 * Look for a series of no more than "max_vals"
+		 * numeric values separated by commas, in base10,
+		 * or base16 when prefixed with "0x".
+		 * Return a count of how many were found.
+		 */
+		for (n = 0; (i = stridx(decimal, *s)) >= 0;) {
+			vals[n] = i;
+			while ((i = stridx(decimal, *++s)) >= 0)
+				vals[n] = (vals[n] * 10) + i;
+			if (*s == 'x' && !vals[n]) {
+				while ((i = stridx(hex, *++s)) >= 0)
+					vals[n] = (vals[n] * 0x10) + i;
+			}
+			if (++n == max_vals)
+				break;
+			if (*s == ',' || *s == ';')
+				++s;
+		}
+		if (!*s)
+			return n;
+	}
+	return 0;	/* zero = nothing matched */
+}
+
+/*
+ * ide_setup() gets called VERY EARLY during initialization,
+ * to handle kernel "command line" strings beginning with "hdx="
+ * or "ide".  Here is the complete set currently supported:
+ *
+ * "hdx="  is recognized for all "x" from "a" to "h", such as "hdc".
+ * "idex=" is recognized for all "x" from "0" to "3", such as "ide1".
+ *
+ * "hdx=noprobe"	: drive may be present, but do not probe for it
+ * "hdx=none"		: drive is NOT present, ignore cmos and do not probe
+ * "hdx=nowerr"		: ignore the WRERR_STAT bit on this drive
+ * "hdx=cdrom"		: drive is present, and is a cdrom drive
+ * "hdx=cyl,head,sect"	: disk drive is present, with specified geometry
+ * "hdx=noremap"	: do not remap 0->1 even though EZD was detected
+ * "hdx=autotune"	: driver will attempt to tune interface speed
+ *				to the fastest PIO mode supported,
+ *				if possible for this drive only.
+ *				Not fully supported by all chipset types,
+ *				and quite likely to cause trouble with
+ *				older/odd IDE drives.
+ *
+ * "hdx=slow"		: insert a huge pause after each access to the data
+ *				port. Should be used only as a last resort.
+ *
+ * "hdx=swapdata"	: when the drive is a disk, byte swap all data
+ * "hdx=bswap"		: same as above..........
+ * "hdxlun=xx"          : set the drive last logical unit.
+ * "hdx=flash"		: allows for more than one ata_flash disk to be
+ *				registered. In most cases, only one device
+ *				will be present.
+ * "hdx=scsi"		: the return of the ide-scsi flag, this is useful for
+ *				allowwing ide-floppy, ide-tape, and ide-cdrom|writers
+ *				to use ide-scsi emulation on a device specific option.
+ * "idebus=xx"		: inform IDE driver of VESA/PCI bus speed in MHz,
+ *				where "xx" is between 20 and 66 inclusive,
+ *				used when tuning chipset PIO modes.
+ *				For PCI bus, 25 is correct for a P75 system,
+ *				30 is correct for P90,P120,P180 systems,
+ *				and 33 is used for P100,P133,P166 systems.
+ *				If in doubt, use idebus=33 for PCI.
+ *				As for VLB, it is safest to not specify it.
+ *
+ * "idex=noprobe"	: do not attempt to access/use this interface
+ * "idex=base"		: probe for an interface at the addr specified,
+ *				where "base" is usually 0x1f0 or 0x170
+ *				and "ctl" is assumed to be "base"+0x206
+ * "idex=base,ctl"	: specify both base and ctl
+ * "idex=base,ctl,irq"	: specify base, ctl, and irq number
+ * "idex=autotune"	: driver will attempt to tune interface speed
+ *				to the fastest PIO mode supported,
+ *				for all drives on this interface.
+ *				Not fully supported by all chipset types,
+ *				and quite likely to cause trouble with
+ *				older/odd IDE drives.
+ * "idex=noautotune"	: driver will NOT attempt to tune interface speed
+ *				This is the default for most chipsets,
+ *				except the cmd640.
+ * "idex=serialize"	: do not overlap operations on idex and ide(x^1)
+ * "idex=four"		: four drives on idex and ide(x^1) share same ports
+ * "idex=reset"		: reset interface before first use
+ * "idex=dma"		: enable DMA by default on both drives if possible
+ * "idex=ata66"		: informs the interface that it has an 80c cable
+ *				for chipsets that are ATA-66 capable, but
+ *				the ablity to bit test for detection is
+ *				currently unknown.
+ * "ide=reverse"	: Formerly called to pci sub-system, but now local.
+ *
+ * The following are valid ONLY on ide0, (except dc4030)
+ * and the defaults for the base,ctl ports must not be altered.
+ *
+ * "ide0=dtc2278"	: probe/support DTC2278 interface
+ * "ide0=ht6560b"	: probe/support HT6560B interface
+ * "ide0=cmd640_vlb"	: *REQUIRED* for VLB cards with the CMD640 chip
+ *			  (not for PCI -- automatically detected)
+ * "ide0=qd65xx"	: probe/support qd65xx interface
+ * "ide0=ali14xx"	: probe/support ali14xx chipsets (ALI M1439, M1443, M1445)
+ * "ide0=umc8672"	: probe/support umc8672 chipsets
+ * "idex=dc4030"	: probe/support Promise DC4030VL interface
+ * "ide=doubler"	: probe/support IDE doublers on Amiga
+ */
+int __init ide_setup (char *s)
+{
+	int i, vals[3];
+	ide_hwif_t *hwif;
+	ide_drive_t *drive;
+	unsigned int hw, unit;
+	const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1);
+	const char max_hwif  = '0' + (MAX_HWIFS - 1);
+
+	
+	if (strncmp(s,"hd",2) == 0 && s[2] == '=')	/* hd= is for hd.c   */
+		return 0;				/* driver and not us */
+
+	if (strncmp(s,"ide",3) &&
+	    strncmp(s,"idebus",6) &&
+	    strncmp(s,"hd",2))		/* hdx= & hdxlun= */
+		return 0;
+
+	printk("ide_setup: %s", s);
+	init_ide_data ();
+
+#ifdef CONFIG_BLK_DEV_IDEDOUBLER
+	if (!strcmp(s, "ide=doubler")) {
+		extern int ide_doubler;
+
+		printk(" : Enabled support for IDE doublers\n");
+		ide_doubler = 1;
+		return 1;
+	}
+#endif /* CONFIG_BLK_DEV_IDEDOUBLER */
+
+	if (!strcmp(s, "ide=nodma")) {
+		printk("IDE: Prevented DMA\n");
+		noautodma = 1;
+		return 1;
+	}
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+	if (!strcmp(s, "ide=reverse")) {
+		ide_scan_direction = 1;
+		printk(" : Enabled support for IDE inverse scan order.\n");
+		return 1;
+	}
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+	/*
+	 * Look for drive options:  "hdx="
+	 */
+	if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) {
+		const char *hd_words[] = {"none", "noprobe", "nowerr", "cdrom",
+				"serialize", "autotune", "noautotune",
+				"slow", "swapdata", "bswap", "flash",
+				"remap", "noremap", "scsi", NULL};
+		unit = s[2] - 'a';
+		hw   = unit / MAX_DRIVES;
+		unit = unit % MAX_DRIVES;
+		hwif = &ide_hwifs[hw];
+		drive = &hwif->drives[unit];
+		if (strncmp(s + 4, "ide-", 4) == 0) {
+			strncpy(drive->driver_req, s + 4, 9);
+			goto done;
+		}
+		/*
+		 * Look for last lun option:  "hdxlun="
+		 */
+		if (s[3] == 'l' && s[4] == 'u' && s[5] == 'n') {
+			if (match_parm(&s[6], NULL, vals, 1) != 1)
+				goto bad_option;
+			if (vals[0] >= 0 && vals[0] <= 7) {
+				drive->last_lun = vals[0];
+				drive->forced_lun = 1;
+			} else
+				printk(" -- BAD LAST LUN! Expected value from 0 to 7");
+			goto done;
+		}
+		switch (match_parm(&s[3], hd_words, vals, 3)) {
+			case -1: /* "none" */
+				drive->nobios = 1;  /* drop into "noprobe" */
+			case -2: /* "noprobe" */
+				drive->noprobe = 1;
+				goto done;
+			case -3: /* "nowerr" */
+				drive->bad_wstat = BAD_R_STAT;
+				hwif->noprobe = 0;
+				goto done;
+			case -4: /* "cdrom" */
+				drive->present = 1;
+				drive->media = ide_cdrom;
+				hwif->noprobe = 0;
+				goto done;
+			case -5: /* "serialize" */
+				printk(" -- USE \"ide%d=serialize\" INSTEAD", hw);
+				goto do_serialize;
+			case -6: /* "autotune" */
+				drive->autotune = 1;
+				goto done;
+			case -7: /* "noautotune" */
+				drive->autotune = 2;
+				goto done;
+			case -8: /* "slow" */
+				drive->slow = 1;
+				goto done;
+			case -9: /* "swapdata" or "bswap" */
+			case -10:
+				drive->bswap = 1;
+				goto done;
+			case -11: /* "flash" */
+				drive->ata_flash = 1;
+				goto done;
+			case -12: /* "remap" */
+				drive->remap_0_to_1 = 1;
+				goto done;
+			case -13: /* "noremap" */
+				drive->remap_0_to_1 = 2;
+				goto done;
+			case -14: /* "scsi" */
+#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI)
+				drive->scsi = 1;
+				goto done;
+#else
+				drive->scsi = 0;
+				goto bad_option;
+#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */
+			case 3: /* cyl,head,sect */
+				drive->media	= ide_disk;
+				drive->cyl	= drive->bios_cyl  = vals[0];
+				drive->head	= drive->bios_head = vals[1];
+				drive->sect	= drive->bios_sect = vals[2];
+				drive->present	= 1;
+				drive->forced_geom = 1;
+				hwif->noprobe = 0;
+				goto done;
+			default:
+				goto bad_option;
+		}
+	}
+
+	if (s[0] != 'i' || s[1] != 'd' || s[2] != 'e')
+		goto bad_option;
+	/*
+	 * Look for bus speed option:  "idebus="
+	 */
+	if (s[3] == 'b' && s[4] == 'u' && s[5] == 's') {
+		if (match_parm(&s[6], NULL, vals, 1) != 1)
+			goto bad_option;
+		if (vals[0] >= 20 && vals[0] <= 66) {
+			idebus_parameter = vals[0];
+		} else
+			printk(" -- BAD BUS SPEED! Expected value from 20 to 66");
+		goto done;
+	}
+	/*
+	 * Look for interface options:  "idex="
+	 */
+	if (s[3] >= '0' && s[3] <= max_hwif) {
+		/*
+		 * Be VERY CAREFUL changing this: note hardcoded indexes below
+		 * -8,-9,-10 : are reserved for future idex calls to ease the hardcoding.
+		 */
+		const char *ide_words[] = {
+			"noprobe", "serialize", "autotune", "noautotune", "reset", "dma", "ata66",
+			"minus8", "minus9", "minus10",
+			"four", "qd65xx", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL };
+		hw = s[3] - '0';
+		hwif = &ide_hwifs[hw];
+		i = match_parm(&s[4], ide_words, vals, 3);
+
+		/*
+		 * Cryptic check to ensure chipset not already set for hwif:
+		 */
+		if (i > 0 || i <= -11) {			/* is parameter a chipset name? */
+			if (hwif->chipset != ide_unknown)
+				goto bad_option;	/* chipset already specified */
+			if (i <= -11 && i != -18 && hw != 0)
+				goto bad_hwif;		/* chipset drivers are for "ide0=" only */
+			if (i <= -11 && i != -18 && ide_hwifs[hw+1].chipset != ide_unknown)
+				goto bad_option;	/* chipset for 2nd port already specified */
+			printk("\n");
+		}
+
+		switch (i) {
+#ifdef CONFIG_BLK_DEV_PDC4030
+			case -18: /* "dc4030" */
+			{
+				extern void init_pdc4030(void);
+				init_pdc4030();
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+#ifdef CONFIG_BLK_DEV_ALI14XX
+			case -17: /* "ali14xx" */
+			{
+				extern void init_ali14xx (void);
+				init_ali14xx();
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_ALI14XX */
+#ifdef CONFIG_BLK_DEV_UMC8672
+			case -16: /* "umc8672" */
+			{
+				extern void init_umc8672 (void);
+				init_umc8672();
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_UMC8672 */
+#ifdef CONFIG_BLK_DEV_DTC2278
+			case -15: /* "dtc2278" */
+			{
+				extern void init_dtc2278 (void);
+				init_dtc2278();
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_DTC2278 */
+#ifdef CONFIG_BLK_DEV_CMD640
+			case -14: /* "cmd640_vlb" */
+			{
+				extern int cmd640_vlb; /* flag for cmd640.c */
+				cmd640_vlb = 1;
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_CMD640 */
+#ifdef CONFIG_BLK_DEV_HT6560B
+			case -13: /* "ht6560b" */
+			{
+				extern void init_ht6560b (void);
+				init_ht6560b();
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_HT6560B */
+#if CONFIG_BLK_DEV_QD65XX
+			case -12: /* "qd65xx" */
+			{
+				extern void init_qd65xx (void);
+				init_qd65xx();
+				goto done;
+			}
+#endif /* CONFIG_BLK_DEV_QD65XX */
+#ifdef CONFIG_BLK_DEV_4DRIVES
+			case -11: /* "four" drives on one set of ports */
+			{
+				ide_hwif_t *mate = &ide_hwifs[hw^1];
+				mate->drives[0].select.all ^= 0x20;
+				mate->drives[1].select.all ^= 0x20;
+				hwif->chipset = mate->chipset = ide_4drives;
+				mate->irq = hwif->irq;
+				memcpy(mate->io_ports, hwif->io_ports, sizeof(hwif->io_ports));
+				goto do_serialize;
+			}
+#endif /* CONFIG_BLK_DEV_4DRIVES */
+			case -10: /* minus10 */
+			case -9: /* minus9 */
+			case -8: /* minus8 */
+				goto bad_option;
+			case -7: /* ata66 */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+				hwif->udma_four = 1;
+				goto done;
+#else /* !CONFIG_BLK_DEV_IDEPCI */
+				hwif->udma_four = 0;
+				goto bad_hwif;
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+			case -6: /* dma */
+				hwif->autodma = 1;
+				goto done;
+			case -5: /* "reset" */
+				hwif->reset = 1;
+				goto done;
+			case -4: /* "noautotune" */
+				hwif->drives[0].autotune = 2;
+				hwif->drives[1].autotune = 2;
+				goto done;
+			case -3: /* "autotune" */
+				hwif->drives[0].autotune = 1;
+				hwif->drives[1].autotune = 1;
+				goto done;
+			case -2: /* "serialize" */
+			do_serialize:
+				hwif->mate = &ide_hwifs[hw^1];
+				hwif->mate->mate = hwif;
+				hwif->serialized = hwif->mate->serialized = 1;
+				goto done;
+
+			case -1: /* "noprobe" */
+				hwif->noprobe = 1;
+				goto done;
+
+			case 1:	/* base */
+				vals[1] = vals[0] + 0x206; /* default ctl */
+			case 2: /* base,ctl */
+				vals[2] = 0;	/* default irq = probe for it */
+			case 3: /* base,ctl,irq */
+				hwif->hw.irq = vals[2];
+				ide_init_hwif_ports(&hwif->hw, (ide_ioreg_t) vals[0], (ide_ioreg_t) vals[1], &hwif->irq);
+				memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+				hwif->irq      = vals[2];
+				hwif->noprobe  = 0;
+				hwif->chipset  = ide_generic;
+				goto done;
+
+			case 0: goto bad_option;
+			default:
+				printk(" -- SUPPORT NOT CONFIGURED IN THIS KERNEL\n");
+				return 1;
+		}
+	}
+bad_option:
+	printk(" -- BAD OPTION\n");
+	return 1;
+bad_hwif:
+	printk("-- NOT SUPPORTED ON ide%d", hw);
+done:
+	printk("\n");
+	return 1;
+}
+
+/*
+ * probe_for_hwifs() finds/initializes "known" IDE interfaces
+ */
+static void __init probe_for_hwifs (void)
+{
+#ifdef CONFIG_PCI
+	if (pci_present())
+	{
+#ifdef CONFIG_BLK_DEV_IDEPCI
+		ide_scan_pcibus(ide_scan_direction);
+#else
+#ifdef CONFIG_BLK_DEV_RZ1000
+		{
+			extern void ide_probe_for_rz100x(void);
+			ide_probe_for_rz100x();
+		}
+#endif /* CONFIG_BLK_DEV_RZ1000 */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+	}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_ETRAX_IDE
+	{
+		extern void init_e100_ide(void);
+		init_e100_ide();
+	}
+#endif /* CONFIG_ETRAX_IDE */
+#ifdef CONFIG_BLK_DEV_CMD640
+	{
+		extern void ide_probe_for_cmd640x(void);
+		ide_probe_for_cmd640x();
+	}
+#endif /* CONFIG_BLK_DEV_CMD640 */
+#ifdef CONFIG_BLK_DEV_PDC4030
+	{
+		extern int ide_probe_for_pdc4030(void);
+		(void) ide_probe_for_pdc4030();
+	}
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+	{
+		extern void pmac_ide_probe(void);
+		pmac_ide_probe();
+	}
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#ifdef CONFIG_BLK_DEV_IDE_SWARM
+	{
+		extern void swarm_ide_probe(void);
+		swarm_ide_probe();
+	}
+#endif /* CONFIG_BLK_DEV_IDE_SWARM */
+#ifdef CONFIG_BLK_DEV_IDE_ICSIDE
+	{
+		extern void icside_init(void);
+		icside_init();
+	}
+#endif /* CONFIG_BLK_DEV_IDE_ICSIDE */
+#ifdef CONFIG_BLK_DEV_IDE_RAPIDE
+	{
+		extern void rapide_init(void);
+		rapide_init();
+	}
+#endif /* CONFIG_BLK_DEV_IDE_RAPIDE */
+#ifdef CONFIG_BLK_DEV_GAYLE
+	{
+		extern void gayle_init(void);
+		gayle_init();
+	}
+#endif /* CONFIG_BLK_DEV_GAYLE */
+#ifdef CONFIG_BLK_DEV_FALCON_IDE
+	{
+		extern void falconide_init(void);
+		falconide_init();
+	}
+#endif /* CONFIG_BLK_DEV_FALCON_IDE */
+#ifdef CONFIG_BLK_DEV_MAC_IDE
+	{
+		extern void macide_init(void);
+		macide_init();
+	}
+#endif /* CONFIG_BLK_DEV_MAC_IDE */
+#ifdef CONFIG_BLK_DEV_Q40IDE
+	{
+		extern void q40ide_init(void);
+		q40ide_init();
+	}
+#endif /* CONFIG_BLK_DEV_Q40IDE */
+#ifdef CONFIG_BLK_DEV_BUDDHA
+	{
+		extern void buddha_init(void);
+		buddha_init();
+	}
+#endif /* CONFIG_BLK_DEV_BUDDHA */
+#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP)
+	{
+		extern void pnpide_init(int enable);
+		pnpide_init(1);
+	}
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+}
+
+void __init ide_init_builtin_drivers (void)
+{
+	/*
+	 * Probe for special PCI and other "known" interface chipsets
+	 */
+	probe_for_hwifs ();
+
+#ifdef CONFIG_BLK_DEV_IDE
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+	if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+		ide_get_lock(&ide_lock, NULL, NULL);	/* for atari only */
+		disable_irq(ide_hwifs[0].irq);	/* disable_irq_nosync ?? */
+//		disable_irq_nosync(ide_hwifs[0].irq);
+	}
+#endif /* __mc68000__ || CONFIG_APUS */
+
+	(void) ideprobe_init();
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+	if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+		enable_irq(ide_hwifs[0].irq);
+		ide_release_lock(&ide_lock);	/* for atari only */
+	}
+#endif /* __mc68000__ || CONFIG_APUS */
+#endif /* CONFIG_BLK_DEV_IDE */
+
+#ifdef CONFIG_PROC_FS
+	proc_ide_create();
+#endif
+
+	/*
+	 * Attempt to match drivers for the available drives
+	 */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+	(void) idedisk_init();
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+	(void) ide_cdrom_init();
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+	(void) idetape_init();
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+	(void) idefloppy_init();
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+ #ifdef CONFIG_SCSI
+	(void) idescsi_init();
+ #else
+    #warning ide scsi-emulation selected but no SCSI-subsystem in kernel
+ #endif
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+}
+
+static int default_cleanup (ide_drive_t *drive)
+{
+	return ide_unregister_subdriver(drive);
+}
+
+static int default_standby (ide_drive_t *drive)
+{
+	return 0;
+}
+
+static int default_flushcache (ide_drive_t *drive)
+{
+	return 0;
+}
+
+static ide_startstop_t default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+	ide_end_request(0, HWGROUP(drive));
+	return ide_stopped;
+}
+ 
+static void default_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+	ide_end_request(uptodate, hwgroup);
+}
+  
+static int default_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	return -EIO;
+}
+
+static int default_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+	drive->usage--;
+	return -EIO;
+}
+
+static void default_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+}
+
+static int default_check_media_change (ide_drive_t *drive)
+{
+	return 1;
+}
+
+static void default_pre_reset (ide_drive_t *drive)
+{
+}
+
+static unsigned long default_capacity (ide_drive_t *drive)
+{
+	return 0x7fffffff;
+}
+
+static ide_startstop_t default_special (ide_drive_t *drive)
+{
+	special_t *s = &drive->special;
+
+	s->all = 0;
+	drive->mult_req = 0;
+	return ide_stopped;
+}
+
+static int default_reinit (ide_drive_t *drive)
+{
+	printk(KERN_ERR "%s: does not support hotswap of device class !\n", drive->name);
+
+	return 0;
+}
+
+static void setup_driver_defaults (ide_drive_t *drive)
+{
+	ide_driver_t *d = drive->driver;
+
+	if (d->cleanup == NULL)		d->cleanup = default_cleanup;
+	if (d->standby == NULL)		d->standby = default_standby;
+	if (d->flushcache == NULL)	d->flushcache = default_flushcache;
+	if (d->do_request == NULL)	d->do_request = default_do_request;
+	if (d->end_request == NULL)	d->end_request = default_end_request;
+	if (d->ioctl == NULL)		d->ioctl = default_ioctl;
+	if (d->open == NULL)		d->open = default_open;
+	if (d->release == NULL)		d->release = default_release;
+	if (d->media_change == NULL)	d->media_change = default_check_media_change;
+	if (d->pre_reset == NULL)	d->pre_reset = default_pre_reset;
+	if (d->capacity == NULL)	d->capacity = default_capacity;
+	if (d->special == NULL)		d->special = default_special;
+	if (d->reinit == NULL)		d->reinit = default_reinit;
+}
+
+ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n)
+{
+	unsigned int unit, index, i;
+
+	for (index = 0, i = 0; index < MAX_HWIFS; ++index) {
+		ide_hwif_t *hwif = &ide_hwifs[index];
+		if (!hwif->present)
+			continue;
+		for (unit = 0; unit < MAX_DRIVES; ++unit) {
+			ide_drive_t *drive = &hwif->drives[unit];
+			char *req = drive->driver_req;
+			if (*req && !strstr(name, req))
+				continue;
+			if (drive->present && drive->media == media && drive->driver == driver && ++i > n)
+				return drive;
+		}
+	}
+	return NULL;
+}
+
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version)
+{
+	unsigned long flags;
+	
+	save_flags(flags);		/* all CPUs */
+	cli();				/* all CPUs */
+	if (version != IDE_SUBDRIVER_VERSION || !drive->present || drive->driver != NULL || drive->busy || drive->usage) {
+		restore_flags(flags);	/* all CPUs */
+		return 1;
+	}
+	drive->driver = driver;
+	setup_driver_defaults(drive);
+	restore_flags(flags);		/* all CPUs */
+	if (drive->autotune != 2) {
+		if (driver->supports_dma && HWIF(drive)->dmaproc != NULL) {
+			/*
+			 * Force DMAing for the beginning of the check.
+			 * Some chipsets appear to do interesting things,
+			 * if not checked and cleared.
+			 *   PARANOIA!!!
+			 */
+			(void) (HWIF(drive)->dmaproc(ide_dma_off_quietly, drive));
+			(void) (HWIF(drive)->dmaproc(ide_dma_check, drive));
+		}
+		drive->dsc_overlap = (drive->next != drive && driver->supports_dsc_overlap);
+		drive->nice1 = 1;
+	}
+	drive->revalidate = 1;
+	drive->suspend_reset = 0;
+#ifdef CONFIG_PROC_FS
+	ide_add_proc_entries(drive->proc, generic_subdriver_entries, drive);
+	ide_add_proc_entries(drive->proc, driver->proc, drive);
+#endif
+	return 0;
+}
+
+int ide_unregister_subdriver (ide_drive_t *drive)
+{
+	unsigned long flags;
+	
+	save_flags(flags);		/* all CPUs */
+	cli();				/* all CPUs */
+	if (drive->usage || drive->busy || drive->driver == NULL || DRIVER(drive)->busy) {
+		restore_flags(flags);	/* all CPUs */
+		return 1;
+	}
+#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP) && defined(MODULE)
+	pnpide_init(0);
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+#ifdef CONFIG_PROC_FS
+	ide_remove_proc_entries(drive->proc, DRIVER(drive)->proc);
+	ide_remove_proc_entries(drive->proc, generic_subdriver_entries);
+#endif
+	auto_remove_settings(drive);
+	drive->driver = NULL;
+	restore_flags(flags);		/* all CPUs */
+	return 0;
+}
+
+int ide_register_module (ide_module_t *module)
+{
+	ide_module_t *p = ide_modules;
+
+	while (p) {
+		if (p == module)
+			return 1;
+		p = p->next;
+	}
+	module->next = ide_modules;
+	ide_modules = module;
+	revalidate_drives();
+	return 0;
+}
+
+void ide_unregister_module (ide_module_t *module)
+{
+	ide_module_t **p;
+
+	for (p = &ide_modules; (*p) && (*p) != module; p = &((*p)->next));
+	if (*p)
+		*p = (*p)->next;
+}
+
+struct block_device_operations ide_fops[] = {{
+	open:			ide_open,
+	release:		ide_release,
+	ioctl:			ide_ioctl,
+	check_media_change:	ide_check_media_change,
+	revalidate:		ide_revalidate_disk
+}};
+
+EXPORT_SYMBOL(ide_hwifs);
+EXPORT_SYMBOL(ide_register_module);
+EXPORT_SYMBOL(ide_unregister_module);
+EXPORT_SYMBOL(ide_spin_wait_hwgroup);
+
+/*
+ * Probe module
+ */
+#ifdef DEVFS_MUST_DIE
+devfs_handle_t ide_devfs_handle;
+#endif
+
+EXPORT_SYMBOL(ide_probe);
+EXPORT_SYMBOL(drive_is_flashcard);
+EXPORT_SYMBOL(ide_timer_expiry);
+EXPORT_SYMBOL(ide_intr);
+EXPORT_SYMBOL(ide_fops);
+EXPORT_SYMBOL(ide_get_queue);
+EXPORT_SYMBOL(ide_add_generic_settings);
+#ifdef DEVFS_MUST_DIE
+EXPORT_SYMBOL(ide_devfs_handle);
+#endif
+EXPORT_SYMBOL(do_ide_request);
+/*
+ * Driver module
+ */
+EXPORT_SYMBOL(ide_scan_devices);
+EXPORT_SYMBOL(ide_register_subdriver);
+EXPORT_SYMBOL(ide_unregister_subdriver);
+EXPORT_SYMBOL(ide_replace_subdriver);
+EXPORT_SYMBOL(ide_input_data);
+EXPORT_SYMBOL(ide_output_data);
+EXPORT_SYMBOL(atapi_input_bytes);
+EXPORT_SYMBOL(atapi_output_bytes);
+EXPORT_SYMBOL(drive_is_ready);
+EXPORT_SYMBOL(ide_set_handler);
+EXPORT_SYMBOL(ide_dump_status);
+EXPORT_SYMBOL(ide_error);
+EXPORT_SYMBOL(ide_fixstring);
+EXPORT_SYMBOL(ide_wait_stat);
+EXPORT_SYMBOL(ide_do_reset);
+EXPORT_SYMBOL(restart_request);
+EXPORT_SYMBOL(ide_init_drive_cmd);
+EXPORT_SYMBOL(ide_do_drive_cmd);
+EXPORT_SYMBOL(ide_end_drive_cmd);
+EXPORT_SYMBOL(ide_end_request);
+EXPORT_SYMBOL(ide_revalidate_disk);
+EXPORT_SYMBOL(ide_cmd);
+EXPORT_SYMBOL(ide_wait_cmd);
+EXPORT_SYMBOL(ide_wait_cmd_task);
+EXPORT_SYMBOL(ide_delay_50ms);
+EXPORT_SYMBOL(ide_stall_queue);
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(ide_add_proc_entries);
+EXPORT_SYMBOL(ide_remove_proc_entries);
+EXPORT_SYMBOL(proc_ide_read_geometry);
+EXPORT_SYMBOL(create_proc_ide_interfaces);
+EXPORT_SYMBOL(recreate_proc_ide_device);
+EXPORT_SYMBOL(destroy_proc_ide_device);
+#endif
+EXPORT_SYMBOL(ide_add_setting);
+EXPORT_SYMBOL(ide_remove_setting);
+
+EXPORT_SYMBOL(ide_register_hw);
+EXPORT_SYMBOL(ide_register);
+EXPORT_SYMBOL(ide_unregister);
+EXPORT_SYMBOL(ide_setup_ports);
+EXPORT_SYMBOL(hwif_unregister);
+EXPORT_SYMBOL(get_info_ptr);
+EXPORT_SYMBOL(current_capacity);
+
+EXPORT_SYMBOL(system_bus_clock);
+
+EXPORT_SYMBOL(ide_reinit_drive);
+
+#if 0
+static int ide_notify_reboot (struct notifier_block *this, unsigned long event, void *x)
+{
+	ide_hwif_t *hwif;
+	ide_drive_t *drive;
+	int i, unit;
+
+	switch (event) {
+		case SYS_HALT:
+		case SYS_POWER_OFF:
+		case SYS_RESTART:
+			break;
+		default:
+			return NOTIFY_DONE;
+	}
+
+	printk("flushing ide devices: ");
+
+	for (i = 0; i < MAX_HWIFS; i++) {
+		hwif = &ide_hwifs[i];
+		if (!hwif->present)
+			continue;
+		for (unit = 0; unit < MAX_DRIVES; ++unit) {
+			drive = &hwif->drives[unit];
+			if (!drive->present)
+				continue;
+
+			/* set the drive to standby */
+			printk("%s ", drive->name);
+			if (event != SYS_RESTART)
+				if (drive->driver != NULL && DRIVER(drive)->standby(drive))
+				continue;
+
+			if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+				continue;
+		}
+	}
+	printk("\n");
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ide_notifier = {
+	ide_notify_reboot,
+	NULL,
+	5
+};
+#endif
+
+/*
+ * This is gets invoked once during initialization, to set *everything* up
+ */
+int __init ide_init (void)
+{
+	static char banner_printed;
+	int i;
+
+	if (!banner_printed) {
+		printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "\n");
+#ifdef DEVFS_MUST_DIE
+		ide_devfs_handle = devfs_mk_dir (NULL, "ide", NULL);
+#endif
+		system_bus_speed = ide_system_bus_speed();
+		banner_printed = 1;
+	}
+
+	init_ide_data ();
+
+	initializing = 1;
+	ide_init_builtin_drivers();
+	initializing = 0;
+
+	for (i = 0; i < MAX_HWIFS; ++i) {
+		ide_hwif_t  *hwif = &ide_hwifs[i];
+		if (hwif->present)
+			ide_geninit(hwif);
+	}
+
+	/*register_reboot_notifier(&ide_notifier);*/
+	return 0;
+}
+
+#ifdef MODULE
+char *options = NULL;
+MODULE_PARM(options,"s");
+MODULE_LICENSE("GPL");
+
+static void __init parse_options (char *line)
+{
+	char *next = line;
+
+	if (line == NULL || !*line)
+		return;
+	while ((line = next) != NULL) {
+ 		if ((next = strchr(line,' ')) != NULL)
+			*next++ = 0;
+		if (!ide_setup(line))
+			printk ("Unknown option '%s'\n", line);
+	}
+}
+
+int init_module (void)
+{
+	parse_options(options);
+	return ide_init();
+}
+
+void cleanup_module (void)
+{
+	int index;
+
+	/*unregister_reboot_notifier(&ide_notifier);*/
+	for (index = 0; index < MAX_HWIFS; ++index) {
+		ide_unregister(index);
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+		if (ide_hwifs[index].dma_base)
+			(void) ide_release_dma(&ide_hwifs[index]);
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+	}
+
+#ifdef CONFIG_PROC_FS
+	proc_ide_destroy();
+#endif
+#ifdef DEVFS_MUST_DIE
+	devfs_unregister (ide_devfs_handle);
+#endif
+}
+
+#else /* !MODULE */
+
+__setup("", ide_setup);
+
+#endif /* MODULE */
diff --git a/xen/drivers/ide/ide_modes.h b/xen/drivers/ide/ide_modes.h
new file mode 100644
index 0000000000..16b8cf123a
--- /dev/null
+++ b/xen/drivers/ide/ide_modes.h
@@ -0,0 +1,236 @@
+/*
+ *  linux/drivers/ide/ide_modes.h
+ *
+ *  Copyright (C) 1996  Linus Torvalds, Igor Abramov, and Mark Lord
+ */
+
+#ifndef _IDE_MODES_H
+#define _IDE_MODES_H
+
+#include <linux/config.h>
+
+/*
+ * Shared data/functions for determining best PIO mode for an IDE drive.
+ * Most of this stuff originally lived in cmd640.c, and changes to the
+ * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid
+ * breaking the fragile cmd640.c support.
+ */
+
+#ifdef CONFIG_BLK_DEV_IDE_MODES
+
+/*
+ * Standard (generic) timings for PIO modes, from ATA2 specification.
+ * These timings are for access to the IDE data port register *only*.
+ * Some drives may specify a mode, while also specifying a different
+ * value for cycle_time (from drive identification data).
+ */
+typedef struct ide_pio_timings_s {
+	int	setup_time;	/* Address setup (ns) minimum */
+	int	active_time;	/* Active pulse (ns) minimum */
+	int	cycle_time;	/* Cycle time (ns) minimum = (setup + active + recovery) */
+} ide_pio_timings_t;
+
+typedef struct ide_pio_data_s {
+	byte pio_mode;
+	byte use_iordy;
+	byte overridden;
+	byte blacklisted;
+	unsigned int cycle_time;
+} ide_pio_data_t;
+	
+#ifndef _IDE_C
+
+int ide_scan_pio_blacklist (char *model);
+byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d);
+extern const ide_pio_timings_t ide_pio_timings[6];
+
+#else /* _IDE_C */
+
+const ide_pio_timings_t ide_pio_timings[6] = {
+	{ 70,	165,	600 },	/* PIO Mode 0 */
+	{ 50,	125,	383 },	/* PIO Mode 1 */
+	{ 30,	100,	240 },	/* PIO Mode 2 */
+	{ 30,	80,	180 },	/* PIO Mode 3 with IORDY */
+	{ 25,	70,	120 },	/* PIO Mode 4 with IORDY */
+	{ 20,	50,	100 }	/* PIO Mode 5 with IORDY (nonstandard) */
+};
+
+/*
+ * Black list. Some drives incorrectly report their maximal PIO mode,
+ * at least in respect to CMD640. Here we keep info on some known drives.
+ */
+static struct ide_pio_info {
+	const char	*name;
+	int		pio;
+} ide_pio_blacklist [] = {
+/*	{ "Conner Peripherals 1275MB - CFS1275A", 4 }, */
+	{ "Conner Peripherals 540MB - CFS540A", 3 },
+
+	{ "WDC AC2700",  3 },
+	{ "WDC AC2540",  3 },
+	{ "WDC AC2420",  3 },
+	{ "WDC AC2340",  3 },
+	{ "WDC AC2250",  0 },
+	{ "WDC AC2200",  0 },
+	{ "WDC AC21200", 4 },
+	{ "WDC AC2120",  0 },
+	{ "WDC AC2850",  3 },
+	{ "WDC AC1270",  3 },
+	{ "WDC AC1170",  1 },
+	{ "WDC AC1210",  1 },
+	{ "WDC AC280",   0 },
+/*	{ "WDC AC21000", 4 }, */
+	{ "WDC AC31000", 3 },
+	{ "WDC AC31200", 3 },
+/*	{ "WDC AC31600", 4 }, */
+
+	{ "Maxtor 7131 AT", 1 },
+	{ "Maxtor 7171 AT", 1 },
+	{ "Maxtor 7213 AT", 1 },
+	{ "Maxtor 7245 AT", 1 },
+	{ "Maxtor 7345 AT", 1 },
+	{ "Maxtor 7546 AT", 3 },
+	{ "Maxtor 7540 AV", 3 },
+
+	{ "SAMSUNG SHD-3121A", 1 },
+	{ "SAMSUNG SHD-3122A", 1 },
+	{ "SAMSUNG SHD-3172A", 1 },
+
+/*	{ "ST51080A", 4 },
+ *	{ "ST51270A", 4 },
+ *	{ "ST31220A", 4 },
+ *	{ "ST31640A", 4 },
+ *	{ "ST32140A", 4 },
+ *	{ "ST3780A",  4 },
+ */
+	{ "ST5660A",  3 },
+	{ "ST3660A",  3 },
+	{ "ST3630A",  3 },
+	{ "ST3655A",  3 },
+	{ "ST3391A",  3 },
+	{ "ST3390A",  1 },
+	{ "ST3600A",  1 },
+	{ "ST3290A",  0 },
+	{ "ST3144A",  0 },
+	{ "ST3491A",  1 },	/* reports 3, should be 1 or 2 (depending on */	
+				/* drive) according to Seagates FIND-ATA program */
+
+	{ "QUANTUM ELS127A", 0 },
+	{ "QUANTUM ELS170A", 0 },
+	{ "QUANTUM LPS240A", 0 },
+	{ "QUANTUM LPS210A", 3 },
+	{ "QUANTUM LPS270A", 3 },
+	{ "QUANTUM LPS365A", 3 },
+	{ "QUANTUM LPS540A", 3 },
+	{ "QUANTUM LIGHTNING 540A", 3 },
+	{ "QUANTUM LIGHTNING 730A", 3 },
+
+        { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */
+        { "QUANTUM FIREBALL_640", 3 }, 
+        { "QUANTUM FIREBALL_1080", 3 },
+        { "QUANTUM FIREBALL_1280", 3 },
+	{ NULL,	0 }
+};
+
+/*
+ * This routine searches the ide_pio_blacklist for an entry
+ * matching the start/whole of the supplied model name.
+ *
+ * Returns -1 if no match found.
+ * Otherwise returns the recommended PIO mode from ide_pio_blacklist[].
+ */
+int ide_scan_pio_blacklist (char *model)
+{
+	struct ide_pio_info *p;
+
+	for (p = ide_pio_blacklist; p->name != NULL; p++) {
+		if (strncmp(p->name, model, strlen(p->name)) == 0)
+			return p->pio;
+	}
+	return -1;
+}
+
+/*
+ * This routine returns the recommended PIO settings for a given drive,
+ * based on the drive->id information and the ide_pio_blacklist[].
+ * This is used by most chipset support modules when "auto-tuning".
+ */
+
+/*
+ * Drive PIO mode auto selection
+ */
+byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d)
+{
+	int pio_mode;
+	int cycle_time = 0;
+	int use_iordy = 0;
+	struct hd_driveid* id = drive->id;
+	int overridden  = 0;
+	int blacklisted = 0;
+
+	if (mode_wanted != 255) {
+		pio_mode = mode_wanted;
+	} else if (!drive->id) {
+		pio_mode = 0;
+	} else if ((pio_mode = ide_scan_pio_blacklist(id->model)) != -1) {
+		overridden = 1;
+		blacklisted = 1;
+		use_iordy = (pio_mode > 2);
+	} else {
+		pio_mode = id->tPIO;
+		if (pio_mode > 2) {	/* 2 is maximum allowed tPIO value */
+			pio_mode = 2;
+			overridden = 1;
+		}
+		if (id->field_valid & 2) {	  /* drive implements ATA2? */
+			if (id->capability & 8) { /* drive supports use_iordy? */
+				use_iordy = 1;
+				cycle_time = id->eide_pio_iordy;
+				if (id->eide_pio_modes & 7) {
+					overridden = 0;
+					if (id->eide_pio_modes & 4)
+						pio_mode = 5;
+					else if (id->eide_pio_modes & 2)
+						pio_mode = 4;
+					else
+						pio_mode = 3;
+				}
+			} else {
+				cycle_time = id->eide_pio;
+			}
+		}
+
+#if 0
+		if (drive->id->major_rev_num & 0x0004) printk("ATA-2 ");
+#endif
+
+		/*
+		 * Conservative "downgrade" for all pre-ATA2 drives
+		 */
+		if (pio_mode && pio_mode < 4) {
+			pio_mode--;
+			overridden = 1;
+#if 0
+			use_iordy = (pio_mode > 2);
+#endif
+			if (cycle_time && cycle_time < ide_pio_timings[pio_mode].cycle_time)
+				cycle_time = 0; /* use standard timing */
+		}
+	}
+	if (pio_mode > max_mode) {
+		pio_mode = max_mode;
+		cycle_time = 0;
+	}
+	if (d) {
+		d->pio_mode = pio_mode;
+		d->cycle_time = cycle_time ? cycle_time : ide_pio_timings[pio_mode].cycle_time;
+		d->use_iordy = use_iordy;
+		d->overridden = overridden;
+		d->blacklisted = blacklisted;
+	}
+	return pio_mode;
+}
+
+#endif /* _IDE_C */
+#endif /* CONFIG_BLK_DEV_IDE_MODES */
+#endif /* _IDE_MODES_H */
diff --git a/xen/drivers/ide/piix.c b/xen/drivers/ide/piix.c
new file mode 100644
index 0000000000..fe538429f3
--- /dev/null
+++ b/xen/drivers/ide/piix.c
@@ -0,0 +1,542 @@
+/*
+ *  linux/drivers/ide/piix.c		Version 0.32	June 9, 2000
+ *
+ *  Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
+ *  Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
+ *  May be copied or modified under the terms of the GNU General Public License
+ *
+ *  PIO mode setting function for Intel chipsets.  
+ *  For use instead of BIOS settings.
+ *
+ * 40-41
+ * 42-43
+ * 
+ *                 41
+ *                 43
+ *
+ * | PIO 0       | c0 | 80 | 0 | 	piix_tune_drive(drive, 0);
+ * | PIO 2 | SW2 | d0 | 90 | 4 | 	piix_tune_drive(drive, 2);
+ * | PIO 3 | MW1 | e1 | a1 | 9 | 	piix_tune_drive(drive, 3);
+ * | PIO 4 | MW2 | e3 | a3 | b | 	piix_tune_drive(drive, 4);
+ * 
+ * sitre = word40 & 0x4000; primary
+ * sitre = word42 & 0x4000; secondary
+ *
+ * 44 8421|8421    hdd|hdb
+ * 
+ * 48 8421         hdd|hdc|hdb|hda udma enabled
+ *
+ *    0001         hda
+ *    0010         hdb
+ *    0100         hdc
+ *    1000         hdd
+ *
+ * 4a 84|21        hdb|hda
+ * 4b 84|21        hdd|hdc
+ *
+ *    ata-33/82371AB
+ *    ata-33/82371EB
+ *    ata-33/82801AB            ata-66/82801AA
+ *    00|00 udma 0              00|00 reserved
+ *    01|01 udma 1              01|01 udma 3
+ *    10|10 udma 2              10|10 udma 4
+ *    11|11 reserved            11|11 reserved
+ *
+ * 54 8421|8421    ata66 drive|ata66 enable
+ *
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x40, &reg40);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x42, &reg42);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x44, &reg44);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x48, &reg48);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x4a, &reg4a);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x54, &reg54);
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+
+#include "ide_modes.h"
+
+#define PIIX_DEBUG_DRIVE_INFO		0
+
+#define DISPLAY_PIIX_TIMINGS
+
+#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS)
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+static int piix_get_info(char *, char **, off_t, int);
+extern int (*piix_display_info)(char *, char **, off_t, int); /* ide-proc.c */
+extern char *ide_media_verbose(ide_drive_t *);
+static struct pci_dev *bmide_dev;
+
+static int piix_get_info (char *buffer, char **addr, off_t offset, int count)
+{
+	char *p = buffer;
+	u32 bibma = pci_resource_start(bmide_dev, 4);
+        u16 reg40 = 0, psitre = 0, reg42 = 0, ssitre = 0;
+	u8  c0 = 0, c1 = 0;
+	u8  reg44 = 0, reg48 = 0, reg4a = 0, reg4b = 0, reg54 = 0, reg55 = 0;
+
+	switch(bmide_dev->device) {
+		case PCI_DEVICE_ID_INTEL_82801BA_8:
+		case PCI_DEVICE_ID_INTEL_82801BA_9:
+	        case PCI_DEVICE_ID_INTEL_82801CA_10:
+	        case PCI_DEVICE_ID_INTEL_82801CA_11:
+	        case PCI_DEVICE_ID_INTEL_82801DB_11:
+	        case PCI_DEVICE_ID_INTEL_82801E_11:
+			p += sprintf(p, "\n                                Intel PIIX4 Ultra 100 Chipset.\n");
+			break;
+		case PCI_DEVICE_ID_INTEL_82372FB_1:
+		case PCI_DEVICE_ID_INTEL_82801AA_1:
+			p += sprintf(p, "\n                                Intel PIIX4 Ultra 66 Chipset.\n");
+			break;
+		case PCI_DEVICE_ID_INTEL_82451NX:
+		case PCI_DEVICE_ID_INTEL_82801AB_1:
+		case PCI_DEVICE_ID_INTEL_82443MX_1:
+		case PCI_DEVICE_ID_INTEL_82371AB:
+			p += sprintf(p, "\n                                Intel PIIX4 Ultra 33 Chipset.\n");
+			break;
+		case PCI_DEVICE_ID_INTEL_82371SB_1:
+			p += sprintf(p, "\n                                Intel PIIX3 Chipset.\n");
+			break;
+		case PCI_DEVICE_ID_INTEL_82371MX:
+			p += sprintf(p, "\n                                Intel MPIIX Chipset.\n");
+			return p-buffer;	/* => must be less than 4k! */
+		case PCI_DEVICE_ID_INTEL_82371FB_1:
+		case PCI_DEVICE_ID_INTEL_82371FB_0:
+		default:
+			p += sprintf(p, "\n                                Intel PIIX Chipset.\n");
+			break;
+	}
+
+	pci_read_config_word(bmide_dev, 0x40, &reg40);
+	pci_read_config_word(bmide_dev, 0x42, &reg42);
+	pci_read_config_byte(bmide_dev, 0x44, &reg44);
+	pci_read_config_byte(bmide_dev, 0x48, &reg48);
+	pci_read_config_byte(bmide_dev, 0x4a, &reg4a);
+	pci_read_config_byte(bmide_dev, 0x4b, &reg4b);
+	pci_read_config_byte(bmide_dev, 0x54, &reg54);
+	pci_read_config_byte(bmide_dev, 0x55, &reg55);
+
+	psitre = (reg40 & 0x4000) ? 1 : 0;
+	ssitre = (reg42 & 0x4000) ? 1 : 0;
+
+	/*
+	 * at that point bibma+0x2 et bibma+0xa are byte registers
+	 * to investigate:
+	 */
+	c0 = inb_p((unsigned short)bibma + 0x02);
+	c1 = inb_p((unsigned short)bibma + 0x0a);
+
+	p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n");
+	p += sprintf(p, "                %sabled                         %sabled\n",
+			(c0&0x80) ? "dis" : " en",
+			(c1&0x80) ? "dis" : " en");
+	p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n");
+	p += sprintf(p, "DMA enabled:    %s              %s             %s               %s\n",
+			(c0&0x20) ? "yes" : "no ",
+			(c0&0x40) ? "yes" : "no ",
+			(c1&0x20) ? "yes" : "no ",
+			(c1&0x40) ? "yes" : "no " );
+	p += sprintf(p, "UDMA enabled:   %s              %s             %s               %s\n",
+			(reg48&0x01) ? "yes" : "no ",
+			(reg48&0x02) ? "yes" : "no ",
+			(reg48&0x04) ? "yes" : "no ",
+			(reg48&0x08) ? "yes" : "no " );
+	p += sprintf(p, "UDMA enabled:   %s                %s               %s                 %s\n",
+			((reg54&0x11) && (reg55&0x10) && (reg4a&0x01)) ? "5" :
+			((reg54&0x11) && (reg4a&0x02)) ? "4" :
+			((reg54&0x11) && (reg4a&0x01)) ? "3" :
+			(reg4a&0x02) ? "2" :
+			(reg4a&0x01) ? "1" :
+			(reg4a&0x00) ? "0" : "X",
+			((reg54&0x22) && (reg55&0x20) && (reg4a&0x10)) ? "5" :
+			((reg54&0x22) && (reg4a&0x20)) ? "4" :
+			((reg54&0x22) && (reg4a&0x10)) ? "3" :
+			(reg4a&0x20) ? "2" :
+			(reg4a&0x10) ? "1" :
+			(reg4a&0x00) ? "0" : "X",
+			((reg54&0x44) && (reg55&0x40) && (reg4b&0x03)) ? "5" :
+			((reg54&0x44) && (reg4b&0x02)) ? "4" :
+			((reg54&0x44) && (reg4b&0x01)) ? "3" :
+			(reg4b&0x02) ? "2" :
+			(reg4b&0x01) ? "1" :
+			(reg4b&0x00) ? "0" : "X",
+			((reg54&0x88) && (reg55&0x80) && (reg4b&0x30)) ? "5" :
+			((reg54&0x88) && (reg4b&0x20)) ? "4" :
+			((reg54&0x88) && (reg4b&0x10)) ? "3" :
+			(reg4b&0x20) ? "2" :
+			(reg4b&0x10) ? "1" :
+			(reg4b&0x00) ? "0" : "X");
+
+	p += sprintf(p, "UDMA\n");
+	p += sprintf(p, "DMA\n");
+	p += sprintf(p, "PIO\n");
+
+/*
+ *	FIXME.... Add configuration junk data....blah blah......
+ */
+
+	return p-buffer;	 /* => must be less than 4k! */
+}
+#endif  /* defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS) */
+
+/*
+ *  Used to set Fifo configuration via kernel command line:
+ */
+
+byte piix_proc = 0;
+
+extern char *ide_xfer_verbose (byte xfer_rate);
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING)
+/*
+ *
+ */
+static byte piix_dma_2_pio (byte xfer_rate) {
+	switch(xfer_rate) {
+		case XFER_UDMA_5:
+		case XFER_UDMA_4:
+		case XFER_UDMA_3:
+		case XFER_UDMA_2:
+		case XFER_UDMA_1:
+		case XFER_UDMA_0:
+		case XFER_MW_DMA_2:
+		case XFER_PIO_4:
+			return 4;
+		case XFER_MW_DMA_1:
+		case XFER_PIO_3:
+			return 3;
+		case XFER_SW_DMA_2:
+		case XFER_PIO_2:
+			return 2;
+		case XFER_MW_DMA_0:
+		case XFER_SW_DMA_1:
+		case XFER_SW_DMA_0:
+		case XFER_PIO_1:
+		case XFER_PIO_0:
+		case XFER_PIO_SLOW:
+		default:
+			return 0;
+	}
+}
+#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */
+
+/*
+ *  Based on settings done by AMI BIOS
+ *  (might be useful if drive is not registered in CMOS for any reason).
+ */
+static void piix_tune_drive (ide_drive_t *drive, byte pio)
+{
+	unsigned long flags;
+	u16 master_data;
+	byte slave_data;
+	int is_slave		= (&HWIF(drive)->drives[1] == drive);
+	int master_port		= HWIF(drive)->index ? 0x42 : 0x40;
+	int slave_port		= 0x44;
+				 /* ISP  RTC */
+	byte timings[][2]	= { { 0, 0 },
+				    { 0, 0 },
+				    { 1, 0 },
+				    { 2, 1 },
+				    { 2, 3 }, };
+
+	pio = ide_get_best_pio_mode(drive, pio, 5, NULL);
+	pci_read_config_word(HWIF(drive)->pci_dev, master_port, &master_data);
+	if (is_slave) {
+		master_data = master_data | 0x4000;
+		if (pio > 1)
+			/* enable PPE, IE and TIME */
+			master_data = master_data | 0x0070;
+		pci_read_config_byte(HWIF(drive)->pci_dev, slave_port, &slave_data);
+		slave_data = slave_data & (HWIF(drive)->index ? 0x0f : 0xf0);
+		slave_data = slave_data | (((timings[pio][0] << 2) | timings[pio][1])
+					   << (HWIF(drive)->index ? 4 : 0));
+	} else {
+		master_data = master_data & 0xccf8;
+		if (pio > 1)
+			/* enable PPE, IE and TIME */
+			master_data = master_data | 0x0007;
+		master_data = master_data | (timings[pio][0] << 12) |
+			      (timings[pio][1] << 8);
+	}
+	save_flags(flags);
+	cli();
+	pci_write_config_word(HWIF(drive)->pci_dev, master_port, master_data);
+	if (is_slave)
+		pci_write_config_byte(HWIF(drive)->pci_dev, slave_port, slave_data);
+	restore_flags(flags);
+}
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING)
+static int piix_tune_chipset (ide_drive_t *drive, byte speed)
+{
+	ide_hwif_t *hwif	= HWIF(drive);
+	struct pci_dev *dev	= hwif->pci_dev;
+	byte maslave		= hwif->channel ? 0x42 : 0x40;
+	int a_speed		= 3 << (drive->dn * 4);
+	int u_flag		= 1 << drive->dn;
+	int v_flag		= 0x01 << drive->dn;
+	int w_flag		= 0x10 << drive->dn;
+	int u_speed		= 0;
+	int err			= 0;
+	int			sitre;
+	short			reg4042, reg44, reg48, reg4a, reg54;
+	byte			reg55;
+
+	pci_read_config_word(dev, maslave, &reg4042);
+	sitre = (reg4042 & 0x4000) ? 1 : 0;
+	pci_read_config_word(dev, 0x44, &reg44);
+	pci_read_config_word(dev, 0x48, &reg48);
+	pci_read_config_word(dev, 0x4a, &reg4a);
+	pci_read_config_word(dev, 0x54, &reg54);
+	pci_read_config_byte(dev, 0x55, &reg55);
+
+	switch(speed) {
+		case XFER_UDMA_4:
+		case XFER_UDMA_2:	u_speed = 2 << (drive->dn * 4); break;
+		case XFER_UDMA_5:
+		case XFER_UDMA_3:
+		case XFER_UDMA_1:	u_speed = 1 << (drive->dn * 4); break;
+		case XFER_UDMA_0:	u_speed = 0 << (drive->dn * 4); break;
+		case XFER_MW_DMA_2:
+		case XFER_MW_DMA_1:
+		case XFER_SW_DMA_2:	break;
+		default:		return -1;
+	}
+
+	if (speed >= XFER_UDMA_0) {
+		if (!(reg48 & u_flag))
+			pci_write_config_word(dev, 0x48, reg48|u_flag);
+		if (speed == XFER_UDMA_5) {
+			pci_write_config_byte(dev, 0x55, (byte) reg55|w_flag);
+		} else {
+			pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag);
+		}
+		if (!(reg4a & u_speed)) {
+			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
+			pci_write_config_word(dev, 0x4a, reg4a|u_speed);
+		}
+		if (speed > XFER_UDMA_2) {
+			if (!(reg54 & v_flag)) {
+				pci_write_config_word(dev, 0x54, reg54|v_flag);
+			}
+		} else {
+			pci_write_config_word(dev, 0x54, reg54 & ~v_flag);
+		}
+	}
+	if (speed < XFER_UDMA_0) {
+		if (reg48 & u_flag)
+			pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
+		if (reg4a & a_speed)
+			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
+		if (reg54 & v_flag)
+			pci_write_config_word(dev, 0x54, reg54 & ~v_flag);
+		if (reg55 & w_flag)
+			pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag);
+	}
+
+	piix_tune_drive(drive, piix_dma_2_pio(speed));
+
+#if PIIX_DEBUG_DRIVE_INFO
+	printk("%s: %s drive%d\n", drive->name, ide_xfer_verbose(speed), drive->dn);
+#endif /* PIIX_DEBUG_DRIVE_INFO */
+	if (!drive->init_speed)
+		drive->init_speed = speed;
+	err = ide_config_drive_speed(drive, speed);
+	drive->current_speed = speed;
+	return err;
+}
+
+static int piix_config_drive_for_dma (ide_drive_t *drive)
+{
+	struct hd_driveid *id	= drive->id;
+	ide_hwif_t *hwif	= HWIF(drive);
+	struct pci_dev *dev	= hwif->pci_dev;
+	byte			speed;
+
+	byte udma_66		= eighty_ninty_three(drive);
+	int ultra100		= ((dev->device == PCI_DEVICE_ID_INTEL_82801BA_8) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801BA_9) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801CA_10) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801CA_11) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801DB_11) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801E_11)) ? 1 : 0;
+	int ultra66		= ((ultra100) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801AA_1) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82372FB_1)) ? 1 : 0;
+	int ultra		= ((ultra66) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82371AB) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82443MX_1) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82451NX) ||
+				   (dev->device == PCI_DEVICE_ID_INTEL_82801AB_1)) ? 1 : 0;
+
+	if ((id->dma_ultra & 0x0020) && (udma_66) && (ultra100)) {
+		speed = XFER_UDMA_5;
+	} else if ((id->dma_ultra & 0x0010) && (ultra)) {
+		speed = ((udma_66) && (ultra66)) ? XFER_UDMA_4 : XFER_UDMA_2;
+	} else if ((id->dma_ultra & 0x0008) && (ultra)) {
+		speed = ((udma_66) && (ultra66)) ? XFER_UDMA_3 : XFER_UDMA_1;
+	} else if ((id->dma_ultra & 0x0004) && (ultra)) {
+		speed = XFER_UDMA_2;
+	} else if ((id->dma_ultra & 0x0002) && (ultra)) {
+		speed = XFER_UDMA_1;
+	} else if ((id->dma_ultra & 0x0001) && (ultra)) {
+		speed = XFER_UDMA_0;
+	} else if (id->dma_mword & 0x0004) {
+		speed = XFER_MW_DMA_2;
+	} else if (id->dma_mword & 0x0002) {
+		speed = XFER_MW_DMA_1;
+	} else if (id->dma_1word & 0x0004) {
+		speed = XFER_SW_DMA_2;
+        } else {
+		speed = XFER_PIO_0 + ide_get_best_pio_mode(drive, 255, 5, NULL);
+	}
+
+	(void) piix_tune_chipset(drive, speed);
+
+	return ((int)	((id->dma_ultra >> 11) & 7) ? ide_dma_on :
+			((id->dma_ultra >> 8) & 7) ? ide_dma_on :
+			((id->dma_mword >> 8) & 7) ? ide_dma_on :
+			((id->dma_1word >> 8) & 7) ? ide_dma_on :
+						     ide_dma_off_quietly);
+}
+
+static void config_chipset_for_pio (ide_drive_t *drive)
+{
+	piix_tune_drive(drive, ide_get_best_pio_mode(drive, 255, 5, NULL));
+}
+
+static int config_drive_xfer_rate (ide_drive_t *drive)
+{
+	struct hd_driveid *id = drive->id;
+	ide_dma_action_t dma_func = ide_dma_on;
+
+	if (id && (id->capability & 1) && HWIF(drive)->autodma) {
+		/* Consult the list of known "bad" drives */
+		if (ide_dmaproc(ide_dma_bad_drive, drive)) {
+			dma_func = ide_dma_off;
+			goto fast_ata_pio;
+		}
+		dma_func = ide_dma_off_quietly;
+		if (id->field_valid & 4) {
+			if (id->dma_ultra & 0x003F) {
+				/* Force if Capable UltraDMA */
+				dma_func = piix_config_drive_for_dma(drive);
+				if ((id->field_valid & 2) &&
+				    (dma_func != ide_dma_on))
+					goto try_dma_modes;
+			}
+		} else if (id->field_valid & 2) {
+try_dma_modes:
+			if ((id->dma_mword & 0x0007) ||
+			    (id->dma_1word & 0x007)) {
+				/* Force if Capable regular DMA modes */
+				dma_func = piix_config_drive_for_dma(drive);
+				if (dma_func != ide_dma_on)
+					goto no_dma_set;
+			}
+		} else if (ide_dmaproc(ide_dma_good_drive, drive)) {
+			if (id->eide_dma_time > 150) {
+				goto no_dma_set;
+			}
+			/* Consult the list of known "good" drives */
+			dma_func = piix_config_drive_for_dma(drive);
+			if (dma_func != ide_dma_on)
+				goto no_dma_set;
+		} else {
+			goto fast_ata_pio;
+		}
+	} else if ((id->capability & 8) || (id->field_valid & 2)) {
+fast_ata_pio:
+		dma_func = ide_dma_off_quietly;
+no_dma_set:
+		config_chipset_for_pio(drive);
+	}
+	return HWIF(drive)->dmaproc(dma_func, drive);
+}
+
+static int piix_dmaproc(ide_dma_action_t func, ide_drive_t *drive)
+{
+	switch (func) {
+		case ide_dma_check:
+			return config_drive_xfer_rate(drive);
+		default :
+			break;
+	}
+	/* Other cases are done by generic IDE-DMA code. */
+	return ide_dmaproc(func, drive);
+}
+#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */
+
+unsigned int __init pci_init_piix (struct pci_dev *dev, const char *name)
+{
+#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS)
+	if (!piix_proc) {
+		piix_proc = 1;
+		bmide_dev = dev;
+		piix_display_info = &piix_get_info;
+	}
+#endif /* DISPLAY_PIIX_TIMINGS && CONFIG_PROC_FS */
+	return 0;
+}
+
+/*
+ * Sheesh, someone at Intel needs to go read the ATA-4/5 T13 standards.
+ * It does not specify device detection, but channel!!!
+ * You determine later if bit 13 of word93 is set...
+ */
+unsigned int __init ata66_piix (ide_hwif_t *hwif)
+{
+	byte reg54h = 0, reg55h = 0, ata66 = 0;
+	byte mask = hwif->channel ? 0xc0 : 0x30;
+
+	pci_read_config_byte(hwif->pci_dev, 0x54, &reg54h);
+	pci_read_config_byte(hwif->pci_dev, 0x55, &reg55h);
+
+	ata66 = (reg54h & mask) ? 1 : 0;
+
+	return ata66;
+}
+
+void __init ide_init_piix (ide_hwif_t *hwif)
+{
+#ifndef CONFIG_IA64
+	if (!hwif->irq)
+		hwif->irq = hwif->channel ? 15 : 14;
+#endif /* CONFIG_IA64 */
+
+	if (hwif->pci_dev->device == PCI_DEVICE_ID_INTEL_82371MX) {
+		/* This is a painful system best to let it self tune for now */
+		return;
+	}
+
+	hwif->tuneproc = &piix_tune_drive;
+	hwif->drives[0].autotune = 1;
+	hwif->drives[1].autotune = 1;
+
+	if (!hwif->dma_base)
+		return;
+
+#ifndef CONFIG_BLK_DEV_IDEDMA
+	hwif->autodma = 0;
+#else /* CONFIG_BLK_DEV_IDEDMA */
+#ifdef CONFIG_PIIX_TUNING
+	if (!noautodma)
+		hwif->autodma = 1;
+	hwif->dmaproc = &piix_dmaproc;
+	hwif->speedproc = &piix_tune_chipset;
+#endif /* CONFIG_PIIX_TUNING */
+#endif /* !CONFIG_BLK_DEV_IDEDMA */
+}
diff --git a/xen/drivers/net/3c59x.c b/xen/drivers/net/3c59x.c
new file mode 100644
index 0000000000..9478920e5b
--- /dev/null
+++ b/xen/drivers/net/3c59x.c
@@ -0,0 +1,2989 @@
+/* EtherLinkXL.c: A 3Com EtherLink PCI III/XL ethernet driver for linux. */
+/*
+	Written 1996-1999 by Donald Becker.
+
+	This software may be used and distributed according to the terms
+	of the GNU General Public License, incorporated herein by reference.
+
+	This driver is for the 3Com "Vortex" and "Boomerang" series ethercards.
+	Members of the series include Fast EtherLink 3c590/3c592/3c595/3c597
+	and the EtherLink XL 3c900 and 3c905 cards.
+
+	Problem reports and questions should be directed to
+	vortex@scyld.com
+
+	The author may be reached as becker@scyld.com, or C/O
+	Scyld Computing Corporation
+	410 Severn Ave., Suite 210
+	Annapolis MD 21403
+
+	Linux Kernel Additions:
+	
+ 	0.99H+lk0.9 - David S. Miller - softnet, PCI DMA updates
+ 	0.99H+lk1.0 - Jeff Garzik <jgarzik@mandrakesoft.com>
+		Remove compatibility defines for kernel versions < 2.2.x.
+		Update for new 2.3.x module interface
+	LK1.1.2 (March 19, 2000)
+	* New PCI interface (jgarzik)
+
+    LK1.1.3 25 April 2000, Andrew Morton <andrewm@uow.edu.au>
+    - Merged with 3c575_cb.c
+    - Don't set RxComplete in boomerang interrupt enable reg
+    - spinlock in vortex_timer to protect mdio functions
+    - disable local interrupts around call to vortex_interrupt in
+      vortex_tx_timeout() (So vortex_interrupt can use spin_lock())
+    - Select window 3 in vortex_timer()'s write to Wn3_MAC_Ctrl
+    - In vortex_start_xmit(), move the lock to _after_ we've altered
+      vp->cur_tx and vp->tx_full.  This defeats the race between
+      vortex_start_xmit() and vortex_interrupt which was identified
+      by Bogdan Costescu.
+    - Merged back support for six new cards from various sources
+    - Set vortex_have_pci if pci_module_init returns zero (fixes cardbus
+      insertion oops)
+    - Tell it that 3c905C has NWAY for 100bT autoneg
+    - Fix handling of SetStatusEnd in 'Too much work..' code, as
+      per 2.3.99's 3c575_cb (Dave Hinds).
+    - Split ISR into two for vortex & boomerang
+    - Fix MOD_INC/DEC races
+    - Handle resource allocation failures.
+    - Fix 3CCFE575CT LED polarity
+    - Make tx_interrupt_mitigation the default
+
+    LK1.1.4 25 April 2000, Andrew Morton <andrewm@uow.edu.au>    
+    - Add extra TxReset to vortex_up() to fix 575_cb hotplug initialisation probs.
+    - Put vortex_info_tbl into __devinitdata
+    - In the vortex_error StatsFull HACK, disable stats in vp->intr_enable as well
+      as in the hardware.
+    - Increased the loop counter in issue_and_wait from 2,000 to 4,000.
+
+    LK1.1.5 28 April 2000, andrewm
+    - Added powerpc defines (John Daniel <jdaniel@etresoft.com> said these work...)
+    - Some extra diagnostics
+    - In vortex_error(), reset the Tx on maxCollisions.  Otherwise most
+      chips usually get a Tx timeout.
+    - Added extra_reset module parm
+    - Replaced some inline timer manip with mod_timer
+      (Franois romieu <Francois.Romieu@nic.fr>)
+    - In vortex_up(), don't make Wn3_config initialisation dependent upon has_nway
+      (this came across from 3c575_cb).
+
+    LK1.1.6 06 Jun 2000, andrewm
+    - Backed out the PPC defines.
+    - Use del_timer_sync(), mod_timer().
+    - Fix wrapped ulong comparison in boomerang_rx()
+    - Add IS_TORNADO, use it to suppress 3c905C checksum error msg
+      (Donald Becker, I Lee Hetherington <ilh@sls.lcs.mit.edu>)
+    - Replace union wn3_config with BFINS/BFEXT manipulation for
+      sparc64 (Pete Zaitcev, Peter Jones)
+    - In vortex_error, do_tx_reset and vortex_tx_timeout(Vortex):
+      do a netif_wake_queue() to better recover from errors. (Anders Pedersen,
+      Donald Becker)
+    - Print a warning on out-of-memory (rate limited to 1 per 10 secs)
+    - Added two more Cardbus 575 NICs: 5b57 and 6564 (Paul Wagland)
+
+    LK1.1.7 2 Jul 2000 andrewm
+    - Better handling of shared IRQs
+    - Reset the transmitter on a Tx reclaim error
+    - Fixed crash under OOM during vortex_open() (Mark Hemment)
+    - Fix Rx cessation problem during OOM (help from Mark Hemment)
+    - The spinlocks around the mdio access were blocking interrupts for 300uS.
+      Fix all this to use spin_lock_bh() within mdio_read/write
+    - Only write to TxFreeThreshold if it's a boomerang - other NICs don't
+      have one.
+    - Added 802.3x MAC-layer flow control support
+
+   LK1.1.8 13 Aug 2000 andrewm
+    - Ignore request_region() return value - already reserved if Cardbus.
+    - Merged some additional Cardbus flags from Don's 0.99Qk
+    - Some fixes for 3c556 (Fred Maciel)
+    - Fix for EISA initialisation (Jan Rekorajski)
+    - Renamed MII_XCVR_PWR and EEPROM_230 to align with 3c575_cb and D. Becker's drivers
+    - Fixed MII_XCVR_PWR for 3CCFE575CT
+    - Added INVERT_LED_PWR, used it.
+    - Backed out the extra_reset stuff
+
+   LK1.1.9 12 Sep 2000 andrewm
+    - Backed out the tx_reset_resume flags.  It was a no-op.
+    - In vortex_error, don't reset the Tx on txReclaim errors
+    - In vortex_error, don't reset the Tx on maxCollisions errors.
+      Hence backed out all the DownListPtr logic here.
+    - In vortex_error, give Tornado cards a partial TxReset on
+      maxCollisions (David Hinds).  Defined MAX_COLLISION_RESET for this.
+    - Redid some driver flags and device names based on pcmcia_cs-3.1.20.
+    - Fixed a bug where, if vp->tx_full is set when the interface
+      is downed, it remains set when the interface is upped.  Bad
+      things happen.
+
+   LK1.1.10 17 Sep 2000 andrewm
+    - Added EEPROM_8BIT for 3c555 (Fred Maciel)
+    - Added experimental support for the 3c556B Laptop Hurricane (Louis Gerbarg)
+    - Add HAS_NWAY to "3c900 Cyclone 10Mbps TPO"
+
+   LK1.1.11 13 Nov 2000 andrewm
+    - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER
+
+   LK1.1.12 1 Jan 2001 andrewm (2.4.0-pre1)
+    - Call pci_enable_device before we request our IRQ (Tobias Ringstrom)
+    - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra)
+    - Added extended issue_and_wait for the 3c905CX.
+    - Look for an MII on PHY index 24 first (3c905CX oddity).
+    - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger)
+    - Don't free skbs we don't own on oom path in vortex_open().
+
+   LK1.1.13 27 Jan 2001
+    - Added explicit `medialock' flag so we can truly
+      lock the media type down with `options'.
+    - "check ioremap return and some tidbits" (Arnaldo Carvalho de Melo <acme@conectiva.com.br>)
+    - Added and used EEPROM_NORESET for 3c556B PM resumes.
+    - Fixed leakage of vp->rx_ring.
+    - Break out separate HAS_HWCKSM device capability flag.
+    - Kill vp->tx_full (ANK)
+    - Merge zerocopy fragment handling (ANK?)
+
+   LK1.1.14 15 Feb 2001
+    - Enable WOL.  Can be turned on with `enable_wol' module option.
+    - EISA and PCI initialisation fixes (jgarzik, Manfred Spraul)
+    - If a device's internalconfig register reports it has NWAY,
+      use it, even if autoselect is enabled.
+
+   LK1.1.15 6 June 2001 akpm
+    - Prevent double counting of received bytes (Lars Christensen)
+    - Add ethtool support (jgarzik)
+    - Add module parm descriptions (Andrzej M. Krzysztofowicz)
+    - Implemented alloc_etherdev() API
+    - Special-case the 'Tx error 82' message.
+
+   LK1.1.16 18 July 2001 akpm
+    - Make NETIF_F_SG dependent upon nr_free_highpages(), not on CONFIG_HIGHMEM
+    - Lessen verbosity of bootup messages
+    - Fix WOL - use new PM API functions.
+    - Use netif_running() instead of vp->open in suspend/resume.
+    - Don't reset the interface logic on open/close/rmmod.  It upsets
+      autonegotiation, and hence DHCP (from 0.99T).
+    - Back out EEPROM_NORESET flag because of the above (we do it for all
+      NICs).
+    - Correct 3c982 identification string
+    - Rename wait_for_completion() to issue_and_wait() to avoid completion.h
+      clash.
+
+    - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details.
+    - Also see Documentation/networking/vortex.txt
+*/
+
+/*
+ * FIXME: This driver _could_ support MTU changing, but doesn't.  See Don's hamachi.c implementation
+ * as well as other drivers
+ *
+ * NOTE: If you make 'vortex_debug' a constant (#define vortex_debug 0) the driver shrinks by 2k
+ * due to dead code elimination.  There will be some performance benefits from this due to
+ * elimination of all the tests and reduced cache footprint.
+ */
+
+
+#define DRV_NAME	"3c59x"
+#define DRV_VERSION	"LK1.1.16"
+#define DRV_RELDATE	"19 July 2001"
+
+
+/* "Knobs" that adjust features and parameters. */
+/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
+   Setting to > 1512 effectively disables this feature. */
+/*#ifndef __arm__
+static const int rx_copybreak = 200;
+#else*/
+/* ARM systems perform better by disregarding the bus-master
+   transfer capability of these cards. -- rmk */
+/*static const int rx_copybreak = 1513;
+#endif*/
+static const int rx_copybreak = 0; /* Xen doesn't copybreak in drivers. */
+
+/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */
+static const int mtu = 1500;
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 32;
+/* Tx timeout interval (millisecs) */
+static int watchdog = 5000;
+
+/* Allow aggregation of Tx interrupts.  Saves CPU load at the cost
+ * of possible Tx stalls if the system is blocking interrupts
+ * somewhere else.  Undefine this to disable.
+ */
+#define tx_interrupt_mitigation 1
+
+/* Put out somewhat more debugging messages. (0: no msg, 1 minimal .. 6). */
+#define vortex_debug debug
+#ifdef VORTEX_DEBUG
+static int vortex_debug = VORTEX_DEBUG;
+#else
+static int vortex_debug = 1;
+#endif
+
+#ifndef __OPTIMIZE__
+#error You must compile this file with the correct options!
+#error See the last lines of the source file.
+#error You must compile this driver with "-O".
+#endif
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/module.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+//#include <linux/in.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/mii.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+//#include <linux/highmem.h>
+#include <asm/irq.h>			/* For NR_IRQS only. */
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+/* A few values that may be tweaked. */
+/* Keep the ring sizes a power of two for efficiency. */
+#undef  TX_RING_SIZE
+#undef  RX_RING_SIZE
+#define TX_RING_SIZE	16
+#define RX_RING_SIZE	32
+#define PKT_BUF_SZ		1536			/* Size of each temporary Rx buffer.*/
+
+/* Kernel compatibility defines, some common to David Hinds' PCMCIA package.
+   This is only in the support-all-kernels source code. */
+
+#define RUN_AT(x) (jiffies + (x))
+
+#include <linux/delay.h>
+
+
+static char version[] __devinitdata =
+DRV_NAME ": Donald Becker and others. www.scyld.com/network/vortex.html\n";
+
+MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
+MODULE_DESCRIPTION("3Com 3c59x/3c9xx ethernet driver "
+					DRV_VERSION " " DRV_RELDATE);
+MODULE_LICENSE("GPL");
+
+MODULE_PARM(debug, "i");
+MODULE_PARM(options, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(full_duplex, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(hw_checksums, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(flow_ctrl, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(enable_wol, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(compaq_ioaddr, "i");
+MODULE_PARM(compaq_irq, "i");
+MODULE_PARM(compaq_device_id, "i");
+MODULE_PARM(watchdog, "i");
+MODULE_PARM_DESC(debug, "3c59x debug level (0-6)");
+MODULE_PARM_DESC(options, "3c59x: Bits 0-3: media type, bit 4: bus mastering, bit 9: full duplex");
+MODULE_PARM_DESC(full_duplex, "3c59x full duplex setting(s) (1)");
+MODULE_PARM_DESC(hw_checksums, "3c59x Hardware checksum checking by adapter(s) (0-1)");
+MODULE_PARM_DESC(flow_ctrl, "3c59x 802.3x flow control usage (PAUSE only) (0-1)");
+MODULE_PARM_DESC(enable_wol, "3c59x: Turn on Wake-on-LAN for adapter(s) (0-1)");
+MODULE_PARM_DESC(rx_copybreak, "3c59x copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(max_interrupt_work, "3c59x maximum events handled per interrupt");
+MODULE_PARM_DESC(compaq_ioaddr, "3c59x PCI I/O base address (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(compaq_irq, "3c59x PCI IRQ number (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(compaq_device_id, "3c59x PCI device ID (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(watchdog, "3c59x transmit timeout in milliseconds");
+
+/* Operational parameter that usually are not changed. */
+
+/* The Vortex size is twice that of the original EtherLinkIII series: the
+   runtime register window, window 1, is now always mapped in.
+   The Boomerang size is twice as large as the Vortex -- it has additional
+   bus master control registers. */
+#define VORTEX_TOTAL_SIZE 0x20
+#define BOOMERANG_TOTAL_SIZE 0x40
+
+/* Set iff a MII transceiver on any interface requires mdio preamble.
+   This only set with the original DP83840 on older 3c905 boards, so the extra
+   code size of a per-interface flag is not worthwhile. */
+static char mii_preamble_required;
+
+#define PFX DRV_NAME ": "
+
+
+
+/*
+				Theory of Operation
+
+I. Board Compatibility
+
+This device driver is designed for the 3Com FastEtherLink and FastEtherLink
+XL, 3Com's PCI to 10/100baseT adapters.  It also works with the 10Mbs
+versions of the FastEtherLink cards.  The supported product IDs are
+  3c590, 3c592, 3c595, 3c597, 3c900, 3c905
+
+The related ISA 3c515 is supported with a separate driver, 3c515.c, included
+with the kernel source or available from
+    cesdis.gsfc.nasa.gov:/pub/linux/drivers/3c515.html
+
+II. Board-specific settings
+
+PCI bus devices are configured by the system at boot time, so no jumpers
+need to be set on the board.  The system BIOS should be set to assign the
+PCI INTA signal to an otherwise unused system IRQ line.
+
+The EEPROM settings for media type and forced-full-duplex are observed.
+The EEPROM media type should be left at the default "autoselect" unless using
+10base2 or AUI connections which cannot be reliably detected.
+
+III. Driver operation
+
+The 3c59x series use an interface that's very similar to the previous 3c5x9
+series.  The primary interface is two programmed-I/O FIFOs, with an
+alternate single-contiguous-region bus-master transfer (see next).
+
+The 3c900 "Boomerang" series uses a full-bus-master interface with separate
+lists of transmit and receive descriptors, similar to the AMD LANCE/PCnet,
+DEC Tulip and Intel Speedo3.  The first chip version retains a compatible
+programmed-I/O interface that has been removed in 'B' and subsequent board
+revisions.
+
+One extension that is advertised in a very large font is that the adapters
+are capable of being bus masters.  On the Vortex chip this capability was
+only for a single contiguous region making it far less useful than the full
+bus master capability.  There is a significant performance impact of taking
+an extra interrupt or polling for the completion of each transfer, as well
+as difficulty sharing the single transfer engine between the transmit and
+receive threads.  Using DMA transfers is a win only with large blocks or
+with the flawed versions of the Intel Orion motherboard PCI controller.
+
+The Boomerang chip's full-bus-master interface is useful, and has the
+currently-unused advantages over other similar chips that queued transmit
+packets may be reordered and receive buffer groups are associated with a
+single frame.
+
+With full-bus-master support, this driver uses a "RX_COPYBREAK" scheme.
+Rather than a fixed intermediate receive buffer, this scheme allocates
+full-sized skbuffs as receive buffers.  The value RX_COPYBREAK is used as
+the copying breakpoint: it is chosen to trade-off the memory wasted by
+passing the full-sized skbuff to the queue layer for all frames vs. the
+copying cost of copying a frame to a correctly-sized skbuff.
+
+IIIC. Synchronization
+The driver runs as two independent, single-threaded flows of control.  One
+is the send-packet routine, which enforces single-threaded use by the
+dev->tbusy flag.  The other thread is the interrupt handler, which is single
+threaded by the hardware and other software.
+
+IV. Notes
+
+Thanks to Cameron Spitzer and Terry Murphy of 3Com for providing development
+3c590, 3c595, and 3c900 boards.
+The name "Vortex" is the internal 3Com project name for the PCI ASIC, and
+the EISA version is called "Demon".  According to Terry these names come
+from rides at the local amusement park.
+
+The new chips support both ethernet (1.5K) and FDDI (4.5K) packet sizes!
+This driver only supports ethernet packets because of the skbuff allocation
+limit of 4K.
+*/
+
+/* This table drives the PCI probe routines.  It's mostly boilerplate in all
+   of the drivers, and will likely be provided by some future kernel.
+*/
+enum pci_flags_bit {
+	PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
+	PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+};
+
+enum {	IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8,
+	EEPROM_8BIT=0x10,	/* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */
+	HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100,
+	INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800,
+	EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 };
+
+enum vortex_chips {
+	CH_3C590 = 0,
+	CH_3C592,
+	CH_3C597,
+	CH_3C595_1,
+	CH_3C595_2,
+
+	CH_3C595_3,
+	CH_3C900_1,
+	CH_3C900_2,
+	CH_3C900_3,
+	CH_3C900_4,
+
+	CH_3C900_5,
+	CH_3C900B_FL,
+	CH_3C905_1,
+	CH_3C905_2,
+	CH_3C905B_1,
+
+	CH_3C905B_2,
+	CH_3C905B_FX,
+	CH_3C905C,
+	CH_3C980,
+	CH_3C9805,
+
+	CH_3CSOHO100_TX,
+	CH_3C555,
+	CH_3C556,
+	CH_3C556B,
+	CH_3C575,
+
+	CH_3C575_1,
+	CH_3CCFE575,
+	CH_3CCFE575CT,
+	CH_3CCFE656,
+	CH_3CCFEM656,
+
+	CH_3CCFEM656_1,
+	CH_3C450,
+};
+
+
+/* note: this array directly indexed by above enums, and MUST
+ * be kept in sync with both the enums above, and the PCI device
+ * table below
+ */
+static struct vortex_chip_info {
+	const char *name;
+	int flags;
+	int drv_flags;
+	int io_size;
+} vortex_info_tbl[] __devinitdata = {
+#define EISA_TBL_OFFSET	0		/* Offset of this entry for vortex_eisa_init */
+	{"3c590 Vortex 10Mbps",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+	{"3c592 EISA 10Mbps Demon/Vortex",					/* AKPM: from Don's 3c59x_cb.c 0.49H */
+	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+	{"3c597 EISA Fast Demon/Vortex",					/* AKPM: from Don's 3c59x_cb.c 0.49H */
+	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+	{"3c595 Vortex 100baseTx",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+	{"3c595 Vortex 100baseT4",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+
+	{"3c595 Vortex 100base-MII",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+	{"3c900 Boomerang 10baseT",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
+	{"3c900 Boomerang 10Mbps Combo",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
+	{"3c900 Cyclone 10Mbps TPO",						/* AKPM: from Don's 0.99M */
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+	{"3c900 Cyclone 10Mbps Combo",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+
+	{"3c900 Cyclone 10Mbps TPC",						/* AKPM: from Don's 0.99M */
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+	{"3c900B-FL Cyclone 10base-FL",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+	{"3c905 Boomerang 100baseTx",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
+	{"3c905 Boomerang 100baseT4",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
+	{"3c905B Cyclone 100baseTx",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+
+	{"3c905B Cyclone 10/100/BNC",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+	{"3c905B-FX Cyclone 100baseFx",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+	{"3c905C Tornado",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
+	{"3c980 Cyclone",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+	{"3c982 Dual Port Server Cyclone",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+
+	{"3cSOHO100-TX Hurricane",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+	{"3c555 Laptop Hurricane",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT|HAS_HWCKSM, 128, },
+	{"3c556 Laptop Tornado",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR|
+									HAS_HWCKSM, 128, },
+	{"3c556B Laptop Hurricane",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR|
+									HAS_HWCKSM, 128, },
+	{"3c575 [Megahertz] 10/100 LAN 	CardBus",
+	PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+
+	{"3c575 Boomerang CardBus",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+	{"3CCFE575BT Cyclone CardBus",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|
+									INVERT_LED_PWR|HAS_HWCKSM, 128, },
+	{"3CCFE575CT Tornado CardBus",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+									MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
+	{"3CCFE656 Cyclone CardBus",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+									INVERT_LED_PWR|HAS_HWCKSM, 128, },
+	{"3CCFEM656B Cyclone+Winmodem CardBus",
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+									INVERT_LED_PWR|HAS_HWCKSM, 128, },
+
+	{"3CXFEM656C Tornado+Winmodem CardBus",			/* From pcmcia-cs-3.1.5 */
+	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+									MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
+	{"3c450 HomePNA Tornado",						/* AKPM: from Don's 0.99Q */
+	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
+	{0,}, /* 0 terminated list. */
+};
+
+
+static struct pci_device_id vortex_pci_tbl[] __devinitdata = {
+	{ 0x10B7, 0x5900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C590 },
+	{ 0x10B7, 0x5920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C592 },
+	{ 0x10B7, 0x5970, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C597 },
+	{ 0x10B7, 0x5950, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_1 },
+	{ 0x10B7, 0x5951, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_2 },
+
+	{ 0x10B7, 0x5952, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_3 },
+	{ 0x10B7, 0x9000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_1 },
+	{ 0x10B7, 0x9001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_2 },
+	{ 0x10B7, 0x9004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_3 },
+	{ 0x10B7, 0x9005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_4 },
+
+	{ 0x10B7, 0x9006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_5 },
+	{ 0x10B7, 0x900A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900B_FL },
+	{ 0x10B7, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_1 },
+	{ 0x10B7, 0x9051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_2 },
+	{ 0x10B7, 0x9055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_1 },
+
+	{ 0x10B7, 0x9058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_2 },
+	{ 0x10B7, 0x905A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_FX },
+	{ 0x10B7, 0x9200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905C },
+	{ 0x10B7, 0x9800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C980 },
+	{ 0x10B7, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C9805 },
+
+	{ 0x10B7, 0x7646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CSOHO100_TX },
+	{ 0x10B7, 0x5055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C555 },
+	{ 0x10B7, 0x6055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556 },
+	{ 0x10B7, 0x6056, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556B },
+	{ 0x10B7, 0x5b57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575 },
+
+	{ 0x10B7, 0x5057, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575_1 },
+	{ 0x10B7, 0x5157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575 },
+	{ 0x10B7, 0x5257, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575CT },
+	{ 0x10B7, 0x6560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE656 },
+	{ 0x10B7, 0x6562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656 },
+
+	{ 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 },
+	{ 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 },
+	{0,}						/* 0 terminated list. */
+};
+MODULE_DEVICE_TABLE(pci, vortex_pci_tbl);
+
+
+/* Operational definitions.
+   These are not used by other compilation units and thus are not
+   exported in a ".h" file.
+
+   First the windows.  There are eight register windows, with the command
+   and status registers available in each.
+   */
+#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD)
+#define EL3_CMD 0x0e
+#define EL3_STATUS 0x0e
+
+/* The top five bits written to EL3_CMD are a command, the lower
+   11 bits are the parameter, if applicable.
+   Note that 11 parameters bits was fine for ethernet, but the new chip
+   can handle FDDI length frames (~4500 octets) and now parameters count
+   32-bit 'Dwords' rather than octets. */
+
+enum vortex_cmd {
+	TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11,
+	RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11,
+	UpStall = 6<<11, UpUnstall = (6<<11)+1,
+	DownStall = (6<<11)+2, DownUnstall = (6<<11)+3,
+	RxDiscard = 8<<11, TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11,
+	FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11,
+	SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11,
+	SetTxThreshold = 18<<11, SetTxStart = 19<<11,
+	StartDMAUp = 20<<11, StartDMADown = (20<<11)+1, StatsEnable = 21<<11,
+	StatsDisable = 22<<11, StopCoax = 23<<11, SetFilterBit = 25<<11,};
+
+/* The SetRxFilter command accepts the following classes: */
+enum RxFilter {
+	RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 };
+
+/* Bits in the general status register. */
+enum vortex_status {
+	IntLatch = 0x0001, HostError = 0x0002, TxComplete = 0x0004,
+	TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020,
+	IntReq = 0x0040, StatsFull = 0x0080,
+	DMADone = 1<<8, DownComplete = 1<<9, UpComplete = 1<<10,
+	DMAInProgress = 1<<11,			/* DMA controller is still busy.*/
+	CmdInProgress = 1<<12,			/* EL3_CMD is still busy.*/
+};
+
+/* Register window 1 offsets, the window used in normal operation.
+   On the Vortex this window is always mapped at offsets 0x10-0x1f. */
+enum Window1 {
+	TX_FIFO = 0x10,  RX_FIFO = 0x10,  RxErrors = 0x14,
+	RxStatus = 0x18,  Timer=0x1A, TxStatus = 0x1B,
+	TxFree = 0x1C, /* Remaining free bytes in Tx buffer. */
+};
+enum Window0 {
+	Wn0EepromCmd = 10,		/* Window 0: EEPROM command register. */
+	Wn0EepromData = 12,		/* Window 0: EEPROM results register. */
+	IntrStatus=0x0E,		/* Valid in all windows. */
+};
+enum Win0_EEPROM_bits {
+	EEPROM_Read = 0x80, EEPROM_WRITE = 0x40, EEPROM_ERASE = 0xC0,
+	EEPROM_EWENB = 0x30,		/* Enable erasing/writing for 10 msec. */
+	EEPROM_EWDIS = 0x00,		/* Disable EWENB before 10 msec timeout. */
+};
+/* EEPROM locations. */
+enum eeprom_offset {
+	PhysAddr01=0, PhysAddr23=1, PhysAddr45=2, ModelID=3,
+	EtherLink3ID=7, IFXcvrIO=8, IRQLine=9,
+	NodeAddr01=10, NodeAddr23=11, NodeAddr45=12,
+	DriverTune=13, Checksum=15};
+
+enum Window2 {			/* Window 2. */
+	Wn2_ResetOptions=12,
+};
+enum Window3 {			/* Window 3: MAC/config bits. */
+	Wn3_Config=0, Wn3_MAC_Ctrl=6, Wn3_Options=8,
+};
+
+#define BFEXT(value, offset, bitcount)  \
+    ((((unsigned long)(value)) >> (offset)) & ((1 << (bitcount)) - 1))
+
+#define BFINS(lhs, rhs, offset, bitcount)					\
+	(((lhs) & ~((((1 << (bitcount)) - 1)) << (offset))) |	\
+	(((rhs) & ((1 << (bitcount)) - 1)) << (offset)))
+
+#define RAM_SIZE(v)		BFEXT(v, 0, 3)
+#define RAM_WIDTH(v)	BFEXT(v, 3, 1)
+#define RAM_SPEED(v)	BFEXT(v, 4, 2)
+#define ROM_SIZE(v)		BFEXT(v, 6, 2)
+#define RAM_SPLIT(v)	BFEXT(v, 16, 2)
+#define XCVR(v)			BFEXT(v, 20, 4)
+#define AUTOSELECT(v)	BFEXT(v, 24, 1)
+
+enum Window4 {		/* Window 4: Xcvr/media bits. */
+	Wn4_FIFODiag = 4, Wn4_NetDiag = 6, Wn4_PhysicalMgmt=8, Wn4_Media = 10,
+};
+enum Win4_Media_bits {
+	Media_SQE = 0x0008,		/* Enable SQE error counting for AUI. */
+	Media_10TP = 0x00C0,	/* Enable link beat and jabber for 10baseT. */
+	Media_Lnk = 0x0080,		/* Enable just link beat for 100TX/100FX. */
+	Media_LnkBeat = 0x0800,
+};
+enum Window7 {					/* Window 7: Bus Master control. */
+	Wn7_MasterAddr = 0, Wn7_MasterLen = 6, Wn7_MasterStatus = 12,
+};
+/* Boomerang bus master control registers. */
+enum MasterCtrl {
+	PktStatus = 0x20, DownListPtr = 0x24, FragAddr = 0x28, FragLen = 0x2c,
+	TxFreeThreshold = 0x2f, UpPktStatus = 0x30, UpListPtr = 0x38,
+};
+
+/* The Rx and Tx descriptor lists.
+   Caution Alpha hackers: these types are 32 bits!  Note also the 8 byte
+   alignment contraint on tx_ring[] and rx_ring[]. */
+#define LAST_FRAG 	0x80000000			/* Last Addr/Len pair in descriptor. */
+#define DN_COMPLETE	0x00010000			/* This packet has been downloaded */
+struct boom_rx_desc {
+	u32 next;					/* Last entry points to 0.   */
+	s32 status;
+	u32 addr;					/* Up to 63 addr/len pairs possible. */
+	s32 length;					/* Set LAST_FRAG to indicate last pair. */
+};
+/* Values for the Rx status entry. */
+enum rx_desc_status {
+	RxDComplete=0x00008000, RxDError=0x4000,
+	/* See boomerang_rx() for actual error bits */
+	IPChksumErr=1<<25, TCPChksumErr=1<<26, UDPChksumErr=1<<27,
+	IPChksumValid=1<<29, TCPChksumValid=1<<30, UDPChksumValid=1<<31,
+};
+
+#ifdef MAX_SKB_FRAGS
+#define DO_ZEROCOPY 1
+#else
+#define DO_ZEROCOPY 0
+#endif
+
+struct boom_tx_desc {
+	u32 next;					/* Last entry points to 0.   */
+	s32 status;					/* bits 0:12 length, others see below.  */
+#if DO_ZEROCOPY
+	struct {
+		u32 addr;
+		s32 length;
+	} frag[1+MAX_SKB_FRAGS];
+#else
+		u32 addr;
+		s32 length;
+#endif
+};
+
+/* Values for the Tx status entry. */
+enum tx_desc_status {
+	CRCDisable=0x2000, TxDComplete=0x8000,
+	AddIPChksum=0x02000000, AddTCPChksum=0x04000000, AddUDPChksum=0x08000000,
+	TxIntrUploaded=0x80000000,		/* IRQ when in FIFO, but maybe not sent. */
+};
+
+/* Chip features we care about in vp->capabilities, read from the EEPROM. */
+enum ChipCaps { CapBusMaster=0x20, CapPwrMgmt=0x2000 };
+
+struct vortex_private {
+	/* The Rx and Tx rings should be quad-word-aligned. */
+	struct boom_rx_desc* rx_ring;
+	struct boom_tx_desc* tx_ring;
+	dma_addr_t rx_ring_dma;
+	dma_addr_t tx_ring_dma;
+	/* The addresses of transmit- and receive-in-place skbuffs. */
+	struct sk_buff* rx_skbuff[RX_RING_SIZE];
+	struct sk_buff* tx_skbuff[TX_RING_SIZE];
+	struct net_device *next_module;		/* NULL if PCI device */
+	unsigned int cur_rx, cur_tx;		/* The next free ring entry */
+	unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
+	struct net_device_stats stats;
+	struct sk_buff *tx_skb;				/* Packet being eaten by bus master ctrl.  */
+	dma_addr_t tx_skb_dma;				/* Allocated DMA address for bus master ctrl DMA.   */
+
+	/* PCI configuration space information. */
+	struct pci_dev *pdev;
+	char *cb_fn_base;					/* CardBus function status addr space. */
+
+	/* Some values here only for performance evaluation and path-coverage */
+	int rx_nocopy, rx_copy, queued_packet, rx_csumhits;
+	int card_idx;
+
+	/* The remainder are related to chip state, mostly media selection. */
+	struct timer_list timer;			/* Media selection timer. */
+	struct timer_list rx_oom_timer;		/* Rx skb allocation retry timer */
+	int options;						/* User-settable misc. driver options. */
+	unsigned int media_override:4, 		/* Passed-in media type. */
+		default_media:4,				/* Read from the EEPROM/Wn3_Config. */
+		full_duplex:1, force_fd:1, autoselect:1,
+		bus_master:1,					/* Vortex can only do a fragment bus-m. */
+		full_bus_master_tx:1, full_bus_master_rx:2, /* Boomerang  */
+		flow_ctrl:1,					/* Use 802.3x flow control (PAUSE only) */
+		partner_flow_ctrl:1,			/* Partner supports flow control */
+		has_nway:1,
+		enable_wol:1,					/* Wake-on-LAN is enabled */
+		pm_state_valid:1,				/* power_state[] has sane contents */
+		open:1,
+		medialock:1,
+		must_free_region:1;				/* Flag: if zero, Cardbus owns the I/O region */
+	int drv_flags;
+	u16 status_enable;
+	u16 intr_enable;
+	u16 available_media;				/* From Wn3_Options. */
+	u16 capabilities, info1, info2;		/* Various, from EEPROM. */
+	u16 advertising;					/* NWay media advertisement */
+	unsigned char phys[2];				/* MII device addresses. */
+	u16 deferred;						/* Resend these interrupts when we
+										 * bale from the ISR */
+	u16 io_size;						/* Size of PCI region (for release_region) */
+	spinlock_t lock;					/* Serialise access to device & its vortex_private */
+	spinlock_t mdio_lock;				/* Serialise access to mdio hardware */
+	u32 power_state[16];
+};
+
+/* The action to take with a media selection timer tick.
+   Note that we deviate from the 3Com order by checking 10base2 before AUI.
+ */
+enum xcvr_types {
+	XCVR_10baseT=0, XCVR_AUI, XCVR_10baseTOnly, XCVR_10base2, XCVR_100baseTx,
+	XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10,
+};
+
+static struct media_table {
+	char *name;
+	unsigned int media_bits:16,		/* Bits to set in Wn4_Media register. */
+		mask:8,						/* The transceiver-present bit in Wn3_Config.*/
+		next:8;						/* The media type to try next. */
+	int wait;						/* Time before we check media status. */
+} media_tbl[] = {
+  {	"10baseT",   Media_10TP,0x08, XCVR_10base2, (14*HZ)/10},
+  { "10Mbs AUI", Media_SQE, 0x20, XCVR_Default, (1*HZ)/10},
+  { "undefined", 0,			0x80, XCVR_10baseT, 10000},
+  { "10base2",   0,			0x10, XCVR_AUI,		(1*HZ)/10},
+  { "100baseTX", Media_Lnk, 0x02, XCVR_100baseFx, (14*HZ)/10},
+  { "100baseFX", Media_Lnk, 0x04, XCVR_MII,		(14*HZ)/10},
+  { "MII",		 0,			0x41, XCVR_10baseT, 3*HZ },
+  { "undefined", 0,			0x01, XCVR_10baseT, 10000},
+  { "Autonegotiate", 0,		0x41, XCVR_10baseT, 3*HZ},
+  { "MII-External",	 0,		0x41, XCVR_10baseT, 3*HZ },
+  { "Default",	 0,			0xFF, XCVR_10baseT, 10000},
+};
+
+static int vortex_probe1(struct pci_dev *pdev, long ioaddr, int irq,
+				   int chip_idx, int card_idx);
+static void vortex_up(struct net_device *dev);
+static void vortex_down(struct net_device *dev);
+static int vortex_open(struct net_device *dev);
+static void mdio_sync(long ioaddr, int bits);
+static int mdio_read(struct net_device *dev, int phy_id, int location);
+static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
+static void vortex_timer(unsigned long arg);
+static void rx_oom_timer(unsigned long arg);
+static int vortex_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int vortex_rx(struct net_device *dev);
+static int boomerang_rx(struct net_device *dev);
+static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static int vortex_close(struct net_device *dev);
+static void dump_tx_ring(struct net_device *dev);
+static void update_stats(long ioaddr, struct net_device *dev);
+static struct net_device_stats *vortex_get_stats(struct net_device *dev);
+static void set_rx_mode(struct net_device *dev);
+static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void vortex_tx_timeout(struct net_device *dev);
+static void acpi_set_WOL(struct net_device *dev);
+
+/* This driver uses 'options' to pass the media type, full-duplex flag, etc. */
+/* Option count limit only -- unlimited interfaces are supported. */
+#define MAX_UNITS 8
+static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1,};
+static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int hw_checksums[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int flow_ctrl[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int enable_wol[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* #define dev_alloc_skb dev_alloc_skb_debug */
+
+/* A list of all installed Vortex EISA devices, for removing the driver module. */
+static struct net_device *root_vortex_eisa_dev;
+
+/* Variables to work-around the Compaq PCI BIOS32 problem. */
+static int compaq_ioaddr, compaq_irq, compaq_device_id = 0x5900;
+
+static int vortex_cards_found;
+
+#ifdef CONFIG_PM
+
+static int vortex_suspend (struct pci_dev *pdev, u32 state)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+
+	if (dev && dev->priv) {
+		if (netif_running(dev)) {
+			netif_device_detach(dev);
+			vortex_down(dev);
+		}
+	}
+	return 0;
+}
+
+static int vortex_resume (struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+
+	if (dev && dev->priv) {
+		if (netif_running(dev)) {
+			vortex_up(dev);
+			netif_device_attach(dev);
+		}
+	}
+	return 0;
+}
+
+#endif /* CONFIG_PM */
+
+/* returns count found (>= 0), or negative on error */
+static int __init vortex_eisa_init (void)
+{
+	long ioaddr;
+	int rc;
+	int orig_cards_found = vortex_cards_found;
+
+	/* Now check all slots of the EISA bus. */
+	if (!EISA_bus)
+		return 0;
+
+	for (ioaddr = 0x1000; ioaddr < 0x9000; ioaddr += 0x1000) {
+		int device_id;
+
+		if (request_region(ioaddr, VORTEX_TOTAL_SIZE, DRV_NAME) == NULL)
+			continue;
+
+		/* Check the standard EISA ID register for an encoded '3Com'. */
+		if (inw(ioaddr + 0xC80) != 0x6d50) {
+			release_region (ioaddr, VORTEX_TOTAL_SIZE);
+			continue;
+		}
+
+		/* Check for a product that we support, 3c59{2,7} any rev. */
+		device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83);
+		if ((device_id & 0xFF00) != 0x5900) {
+			release_region (ioaddr, VORTEX_TOTAL_SIZE);
+			continue;
+		}
+
+		rc = vortex_probe1(NULL, ioaddr, inw(ioaddr + 0xC88) >> 12,
+				   EISA_TBL_OFFSET, vortex_cards_found);
+		if (rc == 0)
+			vortex_cards_found++;
+		else
+			release_region (ioaddr, VORTEX_TOTAL_SIZE);
+	}
+
+	/* Special code to work-around the Compaq PCI BIOS32 problem. */
+	if (compaq_ioaddr) {
+		vortex_probe1(NULL, compaq_ioaddr, compaq_irq,
+					  compaq_device_id, vortex_cards_found++);
+	}
+
+	return vortex_cards_found - orig_cards_found;
+}
+
+/* returns count (>= 0), or negative on error */
+static int __devinit vortex_init_one (struct pci_dev *pdev,
+				      const struct pci_device_id *ent)
+{
+	int rc;
+
+	/* wake up and enable device */		
+	if (pci_enable_device (pdev)) {
+		rc = -EIO;
+	} else {
+		rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq,
+				    ent->driver_data, vortex_cards_found);
+		if (rc == 0)
+			vortex_cards_found++;
+	}
+	return rc;
+}
+
+/*
+ * Start up the PCI device which is described by *pdev.
+ * Return 0 on success.
+ *
+ * NOTE: pdev can be NULL, for the case of an EISA driver
+ */
+static int __devinit vortex_probe1(struct pci_dev *pdev,
+				   long ioaddr, int irq,
+				   int chip_idx, int card_idx)
+{
+	struct vortex_private *vp;
+	int option;
+	unsigned int eeprom[0x40], checksum = 0;		/* EEPROM contents */
+	int i, step;
+	struct net_device *dev;
+	static int printed_version;
+	int retval, print_info;
+	struct vortex_chip_info * const vci = &vortex_info_tbl[chip_idx];
+	char *print_name;
+
+	if (!printed_version) {
+		printk (version);
+		printed_version = 1;
+	}
+
+	print_name = pdev ? pdev->slot_name : "3c59x";
+
+	dev = alloc_etherdev(sizeof(*vp));
+	retval = -ENOMEM;
+	if (!dev) {
+		printk (KERN_ERR PFX "unable to allocate etherdev, aborting\n");
+		goto out;
+	}
+	SET_MODULE_OWNER(dev);
+	vp = dev->priv;
+
+	/* The lower four bits are the media type. */
+	if (dev->mem_start) {
+		/*
+		 * The 'options' param is passed in as the third arg to the
+		 * LILO 'ether=' argument for non-modular use
+		 */
+		option = dev->mem_start;
+	}
+	else if (card_idx < MAX_UNITS)
+		option = options[card_idx];
+	else
+		option = -1;
+
+	if (option > 0) {
+		if (option & 0x8000)
+			vortex_debug = 7;
+		if (option & 0x4000)
+			vortex_debug = 2;
+		if (option & 0x0400)
+			vp->enable_wol = 1;
+	}
+
+	print_info = (vortex_debug > 1);
+	if (print_info)
+		printk (KERN_INFO "See Documentation/networking/vortex.txt\n");
+
+	printk(KERN_INFO "%s: 3Com %s %s at 0x%lx. Vers " DRV_VERSION "\n",
+	       print_name,
+	       pdev ? "PCI" : "EISA",
+	       vci->name,
+	       ioaddr);
+
+	dev->base_addr = ioaddr;
+	dev->irq = irq;
+	dev->mtu = mtu;
+	vp->drv_flags = vci->drv_flags;
+	vp->has_nway = (vci->drv_flags & HAS_NWAY) ? 1 : 0;
+	vp->io_size = vci->io_size;
+	vp->card_idx = card_idx;
+
+	/* module list only for EISA devices */
+	if (pdev == NULL) {
+		vp->next_module = root_vortex_eisa_dev;
+		root_vortex_eisa_dev = dev;
+	}
+
+	/* PCI-only startup logic */
+	if (pdev) {
+		/* EISA resources already marked, so only PCI needs to do this here */
+		/* Ignore return value, because Cardbus drivers already allocate for us */
+		if (request_region(ioaddr, vci->io_size, print_name) != NULL)
+			vp->must_free_region = 1;
+
+		/* enable bus-mastering if necessary */		
+		if (vci->flags & PCI_USES_MASTER)
+			pci_set_master (pdev);
+
+		if (vci->drv_flags & IS_VORTEX) {
+			u8 pci_latency;
+			u8 new_latency = 248;
+
+			/* Check the PCI latency value.  On the 3c590 series the latency timer
+			   must be set to the maximum value to avoid data corruption that occurs
+			   when the timer expires during a transfer.  This bug exists the Vortex
+			   chip only. */
+			pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency);
+			if (pci_latency < new_latency) {
+				printk(KERN_INFO "%s: Overriding PCI latency"
+					" timer (CFLT) setting of %d, new value is %d.\n",
+					print_name, pci_latency, new_latency);
+					pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency);
+			}
+		}
+	}
+
+	spin_lock_init(&vp->lock);
+	spin_lock_init(&vp->mdio_lock);
+	vp->pdev = pdev;
+
+	/* Makes sure rings are at least 16 byte aligned. */
+	vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+					   + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+					   &vp->rx_ring_dma);
+	retval = -ENOMEM;
+	if (vp->rx_ring == 0)
+		goto free_region;
+
+	vp->tx_ring = (struct boom_tx_desc *)(vp->rx_ring + RX_RING_SIZE);
+	vp->tx_ring_dma = vp->rx_ring_dma + sizeof(struct boom_rx_desc) * RX_RING_SIZE;
+
+	/* if we are a PCI driver, we store info in pdev->driver_data
+	 * instead of a module list */	
+	if (pdev)
+		pci_set_drvdata(pdev, dev);
+
+	vp->media_override = 7;
+	if (option >= 0) {
+		vp->media_override = ((option & 7) == 2)  ?  0  :  option & 15;
+		if (vp->media_override != 7)
+			vp->medialock = 1;
+		vp->full_duplex = (option & 0x200) ? 1 : 0;
+		vp->bus_master = (option & 16) ? 1 : 0;
+	}
+
+	if (card_idx < MAX_UNITS) {
+		if (full_duplex[card_idx] > 0)
+			vp->full_duplex = 1;
+		if (flow_ctrl[card_idx] > 0)
+			vp->flow_ctrl = 1;
+		if (enable_wol[card_idx] > 0)
+			vp->enable_wol = 1;
+	}
+
+	vp->force_fd = vp->full_duplex;
+	vp->options = option;
+	/* Read the station address from the EEPROM. */
+	EL3WINDOW(0);
+	{
+		int base;
+
+		if (vci->drv_flags & EEPROM_8BIT)
+			base = 0x230;
+		else if (vci->drv_flags & EEPROM_OFFSET)
+			base = EEPROM_Read + 0x30;
+		else
+			base = EEPROM_Read;
+
+		for (i = 0; i < 0x40; i++) {
+			int timer;
+			outw(base + i, ioaddr + Wn0EepromCmd);
+			/* Pause for at least 162 us. for the read to take place. */
+			for (timer = 10; timer >= 0; timer--) {
+				udelay(162);
+				if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0)
+					break;
+			}
+			eeprom[i] = inw(ioaddr + Wn0EepromData);
+		}
+	}
+	for (i = 0; i < 0x18; i++)
+		checksum ^= eeprom[i];
+	checksum = (checksum ^ (checksum >> 8)) & 0xff;
+	if (checksum != 0x00) {		/* Grrr, needless incompatible change 3Com. */
+		while (i < 0x21)
+			checksum ^= eeprom[i++];
+		checksum = (checksum ^ (checksum >> 8)) & 0xff;
+	}
+	if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO))
+		printk(" ***INVALID CHECKSUM %4.4x*** ", checksum);
+	for (i = 0; i < 3; i++)
+		((u16 *)dev->dev_addr)[i] = htons(eeprom[i + 10]);
+	if (print_info) {
+		for (i = 0; i < 6; i++)
+			printk("%c%2.2x", i ? ':' : ' ', dev->dev_addr[i]);
+	}
+	EL3WINDOW(2);
+	for (i = 0; i < 6; i++)
+		outb(dev->dev_addr[i], ioaddr + i);
+
+#ifdef __sparc__
+	if (print_info)
+		printk(", IRQ %s\n", __irq_itoa(dev->irq));
+#else
+	if (print_info)
+		printk(", IRQ %d\n", dev->irq);
+	/* Tell them about an invalid IRQ. */
+	if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+		printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
+			   dev->irq);
+#endif
+
+	EL3WINDOW(4);
+	step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1;
+	if (print_info) {
+		printk(KERN_INFO "  product code %02x%02x rev %02x.%d date %02d-"
+			"%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14],
+			step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9);
+	}
+
+
+	if (pdev && vci->drv_flags & HAS_CB_FNS) {
+		unsigned long fn_st_addr;			/* Cardbus function status space */
+		unsigned short n;
+
+		fn_st_addr = pci_resource_start (pdev, 2);
+		if (fn_st_addr) {
+			vp->cb_fn_base = ioremap(fn_st_addr, 128);
+			retval = -ENOMEM;
+			if (!vp->cb_fn_base)
+				goto free_ring;
+		}
+		if (print_info) {
+			printk(KERN_INFO "%s: CardBus functions mapped %8.8lx->%p\n",
+				print_name, fn_st_addr, vp->cb_fn_base);
+		}
+		EL3WINDOW(2);
+
+		n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
+		if (vp->drv_flags & INVERT_LED_PWR)
+			n |= 0x10;
+		if (vp->drv_flags & INVERT_MII_PWR)
+			n |= 0x4000;
+		outw(n, ioaddr + Wn2_ResetOptions);
+	}
+
+	/* Extract our information from the EEPROM data. */
+	vp->info1 = eeprom[13];
+	vp->info2 = eeprom[15];
+	vp->capabilities = eeprom[16];
+
+	if (vp->info1 & 0x8000) {
+		vp->full_duplex = 1;
+		if (print_info)
+			printk(KERN_INFO "Full duplex capable\n");
+	}
+
+	{
+		static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"};
+		unsigned int config;
+		EL3WINDOW(3);
+		vp->available_media = inw(ioaddr + Wn3_Options);
+		if ((vp->available_media & 0xff) == 0)		/* Broken 3c916 */
+			vp->available_media = 0x40;
+		config = inl(ioaddr + Wn3_Config);
+		if (print_info) {
+			printk(KERN_DEBUG "  Internal config register is %4.4x, "
+				   "transceivers %#x.\n", config, inw(ioaddr + Wn3_Options));
+			printk(KERN_INFO "  %dK %s-wide RAM %s Rx:Tx split, %s%s interface.\n",
+				   8 << RAM_SIZE(config),
+				   RAM_WIDTH(config) ? "word" : "byte",
+				   ram_split[RAM_SPLIT(config)],
+				   AUTOSELECT(config) ? "autoselect/" : "",
+				   XCVR(config) > XCVR_ExtMII ? "<invalid transceiver>" :
+				   media_tbl[XCVR(config)].name);
+		}
+		vp->default_media = XCVR(config);
+		if (vp->default_media == XCVR_NWAY)
+			vp->has_nway = 1;
+		vp->autoselect = AUTOSELECT(config);
+	}
+
+	if (vp->media_override != 7) {
+		printk(KERN_INFO "%s:  Media override to transceiver type %d (%s).\n",
+				print_name, vp->media_override,
+				media_tbl[vp->media_override].name);
+		dev->if_port = vp->media_override;
+	} else
+		dev->if_port = vp->default_media;
+
+	if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
+		int phy, phy_idx = 0;
+		EL3WINDOW(4);
+		mii_preamble_required++;
+		mii_preamble_required++;
+		mdio_read(dev, 24, 1);
+		for (phy = 0; phy < 32 && phy_idx < 1; phy++) {
+			int mii_status, phyx;
+
+			/*
+			 * For the 3c905CX we look at index 24 first, because it bogusly
+			 * reports an external PHY at all indices
+			 */
+			if (phy == 0)
+				phyx = 24;
+			else if (phy <= 24)
+				phyx = phy - 1;
+			else
+				phyx = phy;
+			mii_status = mdio_read(dev, phyx, 1);
+			if (mii_status  &&  mii_status != 0xffff) {
+				vp->phys[phy_idx++] = phyx;
+				if (print_info) {
+					printk(KERN_INFO "  MII transceiver found at address %d,"
+						" status %4x.\n", phyx, mii_status);
+				}
+				if ((mii_status & 0x0040) == 0)
+					mii_preamble_required++;
+			}
+		}
+		mii_preamble_required--;
+		if (phy_idx == 0) {
+			printk(KERN_WARNING"  ***WARNING*** No MII transceivers found!\n");
+			vp->phys[0] = 24;
+		} else {
+			vp->advertising = mdio_read(dev, vp->phys[0], 4);
+			if (vp->full_duplex) {
+				/* Only advertise the FD media types. */
+				vp->advertising &= ~0x02A0;
+				mdio_write(dev, vp->phys[0], 4, vp->advertising);
+			}
+		}
+	}
+
+	if (vp->capabilities & CapBusMaster) {
+		vp->full_bus_master_tx = 1;
+		if (print_info) {
+			printk(KERN_INFO "  Enabling bus-master transmits and %s receives.\n",
+			(vp->info2 & 1) ? "early" : "whole-frame" );
+		}
+		vp->full_bus_master_rx = (vp->info2 & 1) ? 1 : 2;
+		vp->bus_master = 0;		/* AKPM: vortex only */
+	}
+
+	/* The 3c59x-specific entries in the device structure. */
+	dev->open = vortex_open;
+	if (vp->full_bus_master_tx) {
+		dev->hard_start_xmit = boomerang_start_xmit;
+		/* Actually, it still should work with iommu. */
+		dev->features |= NETIF_F_SG;
+		if (((hw_checksums[card_idx] == -1) && (vp->drv_flags & HAS_HWCKSM)) ||
+					(hw_checksums[card_idx] == 1)) {
+				dev->features |= NETIF_F_IP_CSUM;
+		}
+	} else {
+		dev->hard_start_xmit = vortex_start_xmit;
+	}
+
+	if (print_info) {
+		printk(KERN_INFO "%s: scatter/gather %sabled. h/w checksums %sabled\n",
+				print_name,
+				(dev->features & NETIF_F_SG) ? "en":"dis",
+				(dev->features & NETIF_F_IP_CSUM) ? "en":"dis");
+	}
+
+	dev->stop = vortex_close;
+	dev->get_stats = vortex_get_stats;
+	dev->do_ioctl = vortex_ioctl;
+	dev->set_multicast_list = set_rx_mode;
+	dev->tx_timeout = vortex_tx_timeout;
+	dev->watchdog_timeo = (watchdog * HZ) / 1000;
+	if (pdev && vp->enable_wol) {
+		vp->pm_state_valid = 1;
+ 		pci_save_state(vp->pdev, vp->power_state);
+ 		acpi_set_WOL(dev);
+	}
+	retval = register_netdev(dev);
+	if (retval == 0)
+		return 0;
+
+free_ring:
+	pci_free_consistent(pdev,
+						sizeof(struct boom_rx_desc) * RX_RING_SIZE
+							+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+						vp->rx_ring,
+						vp->rx_ring_dma);
+free_region:
+	if (vp->must_free_region)
+		release_region(ioaddr, vci->io_size);
+	kfree (dev);
+	printk(KERN_ERR PFX "vortex_probe1 fails.  Returns %d\n", retval);
+out:
+	return retval;
+}
+
+static void
+issue_and_wait(struct net_device *dev, int cmd)
+{
+	int i;
+
+	outw(cmd, dev->base_addr + EL3_CMD);
+	for (i = 0; i < 2000; i++) {
+		if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress))
+			return;
+	}
+
+	/* OK, that didn't work.  Do it the slow way.  One second */
+	for (i = 0; i < 100000; i++) {
+		if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress)) {
+			if (vortex_debug > 1)
+				printk(KERN_INFO "%s: command 0x%04x took %d usecs\n",
+					   dev->name, cmd, i * 10);
+			return;
+		}
+		udelay(10);
+	}
+	printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n",
+			   dev->name, cmd, inw(dev->base_addr + EL3_STATUS));
+}
+
+static void
+vortex_up(struct net_device *dev)
+{
+	long ioaddr = dev->base_addr;
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	unsigned int config;
+	int i;
+
+	if (vp->pdev && vp->enable_wol) {
+		pci_set_power_state(vp->pdev, 0);	/* Go active */
+		pci_restore_state(vp->pdev, vp->power_state);
+	}
+
+	/* Before initializing select the active media port. */
+	EL3WINDOW(3);
+	config = inl(ioaddr + Wn3_Config);
+
+	if (vp->media_override != 7) {
+		printk(KERN_INFO "%s: Media override to transceiver %d (%s).\n",
+			   dev->name, vp->media_override,
+			   media_tbl[vp->media_override].name);
+		dev->if_port = vp->media_override;
+	} else if (vp->autoselect) {
+		if (vp->has_nway) {
+			if (vortex_debug > 1)
+				printk(KERN_INFO "%s: using NWAY device table, not %d\n",
+								dev->name, dev->if_port);
+			dev->if_port = XCVR_NWAY;
+		} else {
+			/* Find first available media type, starting with 100baseTx. */
+			dev->if_port = XCVR_100baseTx;
+			while (! (vp->available_media & media_tbl[dev->if_port].mask))
+				dev->if_port = media_tbl[dev->if_port].next;
+			if (vortex_debug > 1)
+				printk(KERN_INFO "%s: first available media type: %s\n",
+					dev->name, media_tbl[dev->if_port].name);
+		}
+	} else {
+		dev->if_port = vp->default_media;
+		if (vortex_debug > 1)
+			printk(KERN_INFO "%s: using default media %s\n",
+				dev->name, media_tbl[dev->if_port].name);
+	}
+
+	init_timer(&vp->timer);
+	vp->timer.expires = RUN_AT(media_tbl[dev->if_port].wait);
+	vp->timer.data = (unsigned long)dev;
+	vp->timer.function = vortex_timer;		/* timer handler */
+	add_timer(&vp->timer);
+
+	init_timer(&vp->rx_oom_timer);
+	vp->rx_oom_timer.data = (unsigned long)dev;
+	vp->rx_oom_timer.function = rx_oom_timer;
+
+	if (vortex_debug > 1)
+		printk(KERN_DEBUG "%s: Initial media type %s.\n",
+			   dev->name, media_tbl[dev->if_port].name);
+
+	vp->full_duplex = vp->force_fd;
+	config = BFINS(config, dev->if_port, 20, 4);
+	if (vortex_debug > 6)
+		printk(KERN_DEBUG "vortex_up(): writing 0x%x to InternalConfig\n", config);
+	outl(config, ioaddr + Wn3_Config);
+
+	if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
+		int mii_reg1, mii_reg5;
+		EL3WINDOW(4);
+		/* Read BMSR (reg1) only to clear old status. */
+		mii_reg1 = mdio_read(dev, vp->phys[0], 1);
+		mii_reg5 = mdio_read(dev, vp->phys[0], 5);
+		if (mii_reg5 == 0xffff  ||  mii_reg5 == 0x0000)
+			;					/* No MII device or no link partner report */
+		else if ((mii_reg5 & 0x0100) != 0	/* 100baseTx-FD */
+				 || (mii_reg5 & 0x00C0) == 0x0040) /* 10T-FD, but not 100-HD */
+			vp->full_duplex = 1;
+		vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0);
+		if (vortex_debug > 1)
+			printk(KERN_INFO "%s: MII #%d status %4.4x, link partner capability %4.4x,"
+				   " info1 %04x, setting %s-duplex.\n",
+					dev->name, vp->phys[0],
+					mii_reg1, mii_reg5,
+					vp->info1, ((vp->info1 & 0x8000) || vp->full_duplex) ? "full" : "half");
+		EL3WINDOW(3);
+	}
+
+	/* Set the full-duplex bit. */
+	outw(	((vp->info1 & 0x8000) || vp->full_duplex ? 0x20 : 0) |
+		 	(dev->mtu > 1500 ? 0x40 : 0) |
+			((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
+			ioaddr + Wn3_MAC_Ctrl);
+
+	if (vortex_debug > 1) {
+		printk(KERN_DEBUG "%s: vortex_up() InternalConfig %8.8x.\n",
+			dev->name, config);
+	}
+
+	issue_and_wait(dev, TxReset);
+	/*
+	 * Don't reset the PHY - that upsets autonegotiation during DHCP operations.
+	 */
+	issue_and_wait(dev, RxReset|0x04);
+
+	outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
+
+	if (vortex_debug > 1) {
+		EL3WINDOW(4);
+		printk(KERN_DEBUG "%s: vortex_up() irq %d media status %4.4x.\n",
+			   dev->name, dev->irq, inw(ioaddr + Wn4_Media));
+	}
+
+	/* Set the station address and mask in window 2 each time opened. */
+	EL3WINDOW(2);
+	for (i = 0; i < 6; i++)
+		outb(dev->dev_addr[i], ioaddr + i);
+	for (; i < 12; i+=2)
+		outw(0, ioaddr + i);
+
+	if (vp->cb_fn_base) {
+		unsigned short n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
+		if (vp->drv_flags & INVERT_LED_PWR)
+			n |= 0x10;
+		if (vp->drv_flags & INVERT_MII_PWR)
+			n |= 0x4000;
+		outw(n, ioaddr + Wn2_ResetOptions);
+	}
+
+	if (dev->if_port == XCVR_10base2)
+		/* Start the thinnet transceiver. We should really wait 50ms...*/
+		outw(StartCoax, ioaddr + EL3_CMD);
+	if (dev->if_port != XCVR_NWAY) {
+		EL3WINDOW(4);
+		outw((inw(ioaddr + Wn4_Media) & ~(Media_10TP|Media_SQE)) |
+			 media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media);
+	}
+
+	/* Switch to the stats window, and clear all stats by reading. */
+	outw(StatsDisable, ioaddr + EL3_CMD);
+	EL3WINDOW(6);
+	for (i = 0; i < 10; i++)
+		inb(ioaddr + i);
+	inw(ioaddr + 10);
+	inw(ioaddr + 12);
+	/* New: On the Vortex we must also clear the BadSSD counter. */
+	EL3WINDOW(4);
+	inb(ioaddr + 12);
+	/* ..and on the Boomerang we enable the extra statistics bits. */
+	outw(0x0040, ioaddr + Wn4_NetDiag);
+
+	/* Switch to register set 7 for normal use. */
+	EL3WINDOW(7);
+
+	if (vp->full_bus_master_rx) { /* Boomerang bus master. */
+		vp->cur_rx = vp->dirty_rx = 0;
+		/* Initialize the RxEarly register as recommended. */
+		outw(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
+		outl(0x0020, ioaddr + PktStatus);
+		outl(vp->rx_ring_dma, ioaddr + UpListPtr);
+	}
+	if (vp->full_bus_master_tx) { 		/* Boomerang bus master Tx. */
+		vp->cur_tx = vp->dirty_tx = 0;
+		if (vp->drv_flags & IS_BOOMERANG)
+			outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); /* Room for a packet. */
+		/* Clear the Rx, Tx rings. */
+		for (i = 0; i < RX_RING_SIZE; i++)	/* AKPM: this is done in vortex_open, too */
+			vp->rx_ring[i].status = 0;
+		for (i = 0; i < TX_RING_SIZE; i++)
+			vp->tx_skbuff[i] = 0;
+		outl(0, ioaddr + DownListPtr);
+	}
+	/* Set receiver mode: presumably accept b-case and phys addr only. */
+	set_rx_mode(dev);
+	outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
+
+//	issue_and_wait(dev, SetTxStart|0x07ff);
+	outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
+	outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
+	/* Allow status bits to be seen. */
+	vp->status_enable = SetStatusEnb | HostError|IntReq|StatsFull|TxComplete|
+		(vp->full_bus_master_tx ? DownComplete : TxAvailable) |
+		(vp->full_bus_master_rx ? UpComplete : RxComplete) |
+		(vp->bus_master ? DMADone : 0);
+	vp->intr_enable = SetIntrEnb | IntLatch | TxAvailable |
+		(vp->full_bus_master_rx ? 0 : RxComplete) |
+		StatsFull | HostError | TxComplete | IntReq
+		| (vp->bus_master ? DMADone : 0) | UpComplete | DownComplete;
+	outw(vp->status_enable, ioaddr + EL3_CMD);
+	/* Ack all pending events, and set active indicator mask. */
+	outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
+		 ioaddr + EL3_CMD);
+	outw(vp->intr_enable, ioaddr + EL3_CMD);
+	if (vp->cb_fn_base)			/* The PCMCIA people are idiots.  */
+		writel(0x8000, vp->cb_fn_base + 4);
+	netif_start_queue (dev);
+}
+
+static int
+vortex_open(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	int i;
+	int retval;
+
+	/* Use the now-standard shared IRQ implementation. */
+	if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
+				&boomerang_interrupt : &vortex_interrupt, SA_SHIRQ, dev->name, dev))) {
+		printk(KERN_ERR "%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
+		goto out;
+	}
+
+	if (vp->full_bus_master_rx) { /* Boomerang bus master. */
+		if (vortex_debug > 2)
+			printk(KERN_DEBUG "%s:  Filling in the Rx ring.\n", dev->name);
+		for (i = 0; i < RX_RING_SIZE; i++) {
+			struct sk_buff *skb;
+			vp->rx_ring[i].next = cpu_to_le32(vp->rx_ring_dma + sizeof(struct boom_rx_desc) * (i+1));
+			vp->rx_ring[i].status = 0;	/* Clear complete bit. */
+			vp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ | LAST_FRAG);
+			skb = dev_alloc_skb(PKT_BUF_SZ);
+			vp->rx_skbuff[i] = skb;
+			if (skb == NULL)
+				break;			/* Bad news!  */
+			skb->dev = dev;			/* Mark as being used by this device. */
+			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
+			vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+		}
+		if (i != RX_RING_SIZE) {
+			int j;
+			printk(KERN_EMERG "%s: no memory for rx ring\n", dev->name);
+			for (j = 0; j < i; j++) {
+				if (vp->rx_skbuff[j]) {
+					dev_kfree_skb(vp->rx_skbuff[j]);
+					vp->rx_skbuff[j] = 0;
+				}
+			}
+			retval = -ENOMEM;
+			goto out_free_irq;
+		}
+		/* Wrap the ring. */
+		vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma);
+	}
+
+	vortex_up(dev);
+	return 0;
+
+out_free_irq:
+	free_irq(dev->irq, dev);
+out:
+	if (vortex_debug > 1)
+		printk(KERN_ERR "%s: vortex_open() fails: returning %d\n", dev->name, retval);
+	return retval;
+}
+
+static void
+vortex_timer(unsigned long data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	int next_tick = 60*HZ;
+	int ok = 0;
+	int media_status, mii_status, old_window;
+
+	if (vortex_debug > 2) {
+		printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n",
+			   dev->name, media_tbl[dev->if_port].name);
+		printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
+	}
+
+	if (vp->medialock)
+		goto leave_media_alone;
+	disable_irq(dev->irq);
+	old_window = inw(ioaddr + EL3_CMD) >> 13;
+	EL3WINDOW(4);
+	media_status = inw(ioaddr + Wn4_Media);
+	switch (dev->if_port) {
+	case XCVR_10baseT:  case XCVR_100baseTx:  case XCVR_100baseFx:
+		if (media_status & Media_LnkBeat) {
+			ok = 1;
+			if (vortex_debug > 1)
+				printk(KERN_DEBUG "%s: Media %s has link beat, %x.\n",
+					   dev->name, media_tbl[dev->if_port].name, media_status);
+		} else if (vortex_debug > 1)
+			printk(KERN_DEBUG "%s: Media %s has no link beat, %x.\n",
+				   dev->name, media_tbl[dev->if_port].name, media_status);
+		break;
+	case XCVR_MII: case XCVR_NWAY:
+		{
+			mii_status = mdio_read(dev, vp->phys[0], 1);
+			ok = 1;
+			if (vortex_debug > 2)
+				printk(KERN_DEBUG "%s: MII transceiver has status %4.4x.\n",
+					dev->name, mii_status);
+			if (mii_status & 0x0004) {
+				int mii_reg5 = mdio_read(dev, vp->phys[0], 5);
+				if (! vp->force_fd  &&  mii_reg5 != 0xffff) {
+					int duplex = (mii_reg5&0x0100) ||
+						(mii_reg5 & 0x01C0) == 0x0040;
+					if (vp->full_duplex != duplex) {
+						vp->full_duplex = duplex;
+						printk(KERN_INFO "%s: Setting %s-duplex based on MII "
+							"#%d link partner capability of %4.4x.\n",
+							dev->name, vp->full_duplex ? "full" : "half",
+							vp->phys[0], mii_reg5);
+						/* Set the full-duplex bit. */
+						EL3WINDOW(3);
+						outw(	(vp->full_duplex ? 0x20 : 0) |
+								(dev->mtu > 1500 ? 0x40 : 0) |
+								((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
+								ioaddr + Wn3_MAC_Ctrl);
+						if (vortex_debug > 1)
+							printk(KERN_DEBUG "Setting duplex in Wn3_MAC_Ctrl\n");
+						/* AKPM: bug: should reset Tx and Rx after setting Duplex.  Page 180 */
+					}
+				}
+			}
+		}
+		break;
+	  default:					/* Other media types handled by Tx timeouts. */
+		if (vortex_debug > 1)
+		  printk(KERN_DEBUG "%s: Media %s has no indication, %x.\n",
+				 dev->name, media_tbl[dev->if_port].name, media_status);
+		ok = 1;
+	}
+	if ( ! ok) {
+		unsigned int config;
+
+		do {
+			dev->if_port = media_tbl[dev->if_port].next;
+		} while ( ! (vp->available_media & media_tbl[dev->if_port].mask));
+		if (dev->if_port == XCVR_Default) { /* Go back to default. */
+		  dev->if_port = vp->default_media;
+		  if (vortex_debug > 1)
+			printk(KERN_DEBUG "%s: Media selection failing, using default "
+				   "%s port.\n",
+				   dev->name, media_tbl[dev->if_port].name);
+		} else {
+			if (vortex_debug > 1)
+				printk(KERN_DEBUG "%s: Media selection failed, now trying "
+					   "%s port.\n",
+					   dev->name, media_tbl[dev->if_port].name);
+			next_tick = media_tbl[dev->if_port].wait;
+		}
+		outw((media_status & ~(Media_10TP|Media_SQE)) |
+			 media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media);
+
+		EL3WINDOW(3);
+		config = inl(ioaddr + Wn3_Config);
+		config = BFINS(config, dev->if_port, 20, 4);
+		outl(config, ioaddr + Wn3_Config);
+
+		outw(dev->if_port == XCVR_10base2 ? StartCoax : StopCoax,
+			 ioaddr + EL3_CMD);
+		if (vortex_debug > 1)
+			printk(KERN_DEBUG "wrote 0x%08x to Wn3_Config\n", config);
+		/* AKPM: FIXME: Should reset Rx & Tx here.  P60 of 3c90xc.pdf */
+	}
+	EL3WINDOW(old_window);
+	enable_irq(dev->irq);
+
+leave_media_alone:
+	if (vortex_debug > 2)
+	  printk(KERN_DEBUG "%s: Media selection timer finished, %s.\n",
+			 dev->name, media_tbl[dev->if_port].name);
+
+	mod_timer(&vp->timer, RUN_AT(next_tick));
+	if (vp->deferred)
+		outw(FakeIntr, ioaddr + EL3_CMD);
+	return;
+}
+
+static void vortex_tx_timeout(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+
+	printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x.\n",
+		   dev->name, inb(ioaddr + TxStatus),
+		   inw(ioaddr + EL3_STATUS));
+	EL3WINDOW(4);
+	printk(KERN_ERR "  diagnostics: net %04x media %04x dma %8.8x.\n",
+		   inw(ioaddr + Wn4_NetDiag), inw(ioaddr + Wn4_Media),
+		   inl(ioaddr + PktStatus));
+	/* Slight code bloat to be user friendly. */
+	if ((inb(ioaddr + TxStatus) & 0x88) == 0x88)
+		printk(KERN_ERR "%s: Transmitter encountered 16 collisions --"
+			   " network cable problem?\n", dev->name);
+	if (inw(ioaddr + EL3_STATUS) & IntLatch) {
+		printk(KERN_ERR "%s: Interrupt posted but not delivered --"
+			   " IRQ blocked by another device?\n", dev->name);
+		/* Bad idea here.. but we might as well handle a few events. */
+		{
+			/*
+			 * Block interrupts because vortex_interrupt does a bare spin_lock()
+			 */
+			unsigned long flags;
+			local_irq_save(flags);
+			if (vp->full_bus_master_tx)
+				boomerang_interrupt(dev->irq, dev, 0);
+			else
+				vortex_interrupt(dev->irq, dev, 0);
+			local_irq_restore(flags);
+		}
+	}
+
+	if (vortex_debug > 0)
+		dump_tx_ring(dev);
+
+	issue_and_wait(dev, TxReset);
+
+	vp->stats.tx_errors++;
+	if (vp->full_bus_master_tx) {
+		printk(KERN_DEBUG "%s: Resetting the Tx ring pointer.\n", dev->name);
+		if (vp->cur_tx - vp->dirty_tx > 0  &&  inl(ioaddr + DownListPtr) == 0)
+			outl(vp->tx_ring_dma + (vp->dirty_tx % TX_RING_SIZE) * sizeof(struct boom_tx_desc),
+				 ioaddr + DownListPtr);
+		if (vp->cur_tx - vp->dirty_tx < TX_RING_SIZE)
+			netif_wake_queue (dev);
+		if (vp->drv_flags & IS_BOOMERANG)
+			outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold);
+		outw(DownUnstall, ioaddr + EL3_CMD);
+	} else {
+		vp->stats.tx_dropped++;
+		netif_wake_queue(dev);
+	}
+	
+	/* Issue Tx Enable */
+	outw(TxEnable, ioaddr + EL3_CMD);
+	dev->trans_start = jiffies;
+	
+	/* Switch to register set 7 for normal use. */
+	EL3WINDOW(7);
+}
+
+/*
+ * Handle uncommon interrupt sources.  This is a separate routine to minimize
+ * the cache impact.
+ */
+static void
+vortex_error(struct net_device *dev, int status)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	int do_tx_reset = 0, reset_mask = 0;
+	unsigned char tx_status = 0;
+
+	if (vortex_debug > 2) {
+		printk(KERN_ERR "%s: vortex_error(), status=0x%x\n", dev->name, status);
+	}
+
+	if (status & TxComplete) {			/* Really "TxError" for us. */
+		tx_status = inb(ioaddr + TxStatus);
+		/* Presumably a tx-timeout. We must merely re-enable. */
+		if (vortex_debug > 2
+			|| (tx_status != 0x88 && vortex_debug > 0)) {
+			printk(KERN_ERR "%s: Transmit error, Tx status register %2.2x.\n",
+				   dev->name, tx_status);
+			if (tx_status == 0x82) {
+				printk(KERN_ERR "Probably a duplex mismatch.  See "
+						"Documentation/networking/vortex.txt\n");
+			}
+			dump_tx_ring(dev);
+		}
+		if (tx_status & 0x14)  vp->stats.tx_fifo_errors++;
+		if (tx_status & 0x38)  vp->stats.tx_aborted_errors++;
+		outb(0, ioaddr + TxStatus);
+		if (tx_status & 0x30) {			/* txJabber or txUnderrun */
+			do_tx_reset = 1;
+		} else if ((tx_status & 0x08) && (vp->drv_flags & MAX_COLLISION_RESET)) {	/* maxCollisions */
+			do_tx_reset = 1;
+			reset_mask = 0x0108;		/* Reset interface logic, but not download logic */
+		} else {						/* Merely re-enable the transmitter. */
+			outw(TxEnable, ioaddr + EL3_CMD);
+		}
+	}
+
+	if (status & RxEarly) {				/* Rx early is unused. */
+		vortex_rx(dev);
+		outw(AckIntr | RxEarly, ioaddr + EL3_CMD);
+	}
+	if (status & StatsFull) {			/* Empty statistics. */
+		static int DoneDidThat;
+		if (vortex_debug > 4)
+			printk(KERN_DEBUG "%s: Updating stats.\n", dev->name);
+		update_stats(ioaddr, dev);
+		/* HACK: Disable statistics as an interrupt source. */
+		/* This occurs when we have the wrong media type! */
+		if (DoneDidThat == 0  &&
+			inw(ioaddr + EL3_STATUS) & StatsFull) {
+			printk(KERN_WARNING "%s: Updating statistics failed, disabling "
+				   "stats as an interrupt source.\n", dev->name);
+			EL3WINDOW(5);
+			outw(SetIntrEnb | (inw(ioaddr + 10) & ~StatsFull), ioaddr + EL3_CMD);
+			vp->intr_enable &= ~StatsFull;
+			EL3WINDOW(7);
+			DoneDidThat++;
+		}
+	}
+	if (status & IntReq) {		/* Restore all interrupt sources.  */
+		outw(vp->status_enable, ioaddr + EL3_CMD);
+		outw(vp->intr_enable, ioaddr + EL3_CMD);
+	}
+	if (status & HostError) {
+		u16 fifo_diag;
+		EL3WINDOW(4);
+		fifo_diag = inw(ioaddr + Wn4_FIFODiag);
+		printk(KERN_ERR "%s: Host error, FIFO diagnostic register %4.4x.\n",
+			   dev->name, fifo_diag);
+		/* Adapter failure requires Tx/Rx reset and reinit. */
+		if (vp->full_bus_master_tx) {
+			int bus_status = inl(ioaddr + PktStatus);
+			/* 0x80000000 PCI master abort. */
+			/* 0x40000000 PCI target abort. */
+			if (vortex_debug)
+				printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", dev->name, bus_status);
+
+			/* In this case, blow the card away */
+			vortex_down(dev);
+			issue_and_wait(dev, TotalReset | 0xff);
+			vortex_up(dev);		/* AKPM: bug.  vortex_up() assumes that the rx ring is full. It may not be. */
+		} else if (fifo_diag & 0x0400)
+			do_tx_reset = 1;
+		if (fifo_diag & 0x3000) {
+			/* Reset Rx fifo and upload logic */
+			issue_and_wait(dev, RxReset|0x07);
+			/* Set the Rx filter to the current state. */
+			set_rx_mode(dev);
+			outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */
+			outw(AckIntr | HostError, ioaddr + EL3_CMD);
+		}
+	}
+
+	if (do_tx_reset) {
+		issue_and_wait(dev, TxReset|reset_mask);
+		outw(TxEnable, ioaddr + EL3_CMD);
+		if (!vp->full_bus_master_tx)
+			netif_wake_queue(dev);
+	}
+}
+
+static int
+vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+
+	/* Put out the doubleword header... */
+	outl(skb->len, ioaddr + TX_FIFO);
+	if (vp->bus_master) {
+		/* Set the bus-master controller to transfer the packet. */
+		int len = (skb->len + 3) & ~3;
+		outl(	vp->tx_skb_dma = pci_map_single(vp->pdev, skb->data, len, PCI_DMA_TODEVICE),
+				ioaddr + Wn7_MasterAddr);
+		outw(len, ioaddr + Wn7_MasterLen);
+		vp->tx_skb = skb;
+		outw(StartDMADown, ioaddr + EL3_CMD);
+		/* netif_wake_queue() will be called at the DMADone interrupt. */
+	} else {
+		/* ... and the packet rounded to a doubleword. */
+		outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
+		dev_kfree_skb (skb);
+		if (inw(ioaddr + TxFree) > 1536) {
+			netif_start_queue (dev);	/* AKPM: redundant? */
+		} else {
+			/* Interrupt us when the FIFO has room for max-sized packet. */
+			netif_stop_queue(dev);
+			outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
+		}
+	}
+
+	dev->trans_start = jiffies;
+
+	/* Clear the Tx status stack. */
+	{
+		int tx_status;
+		int i = 32;
+
+		while (--i > 0	&&	(tx_status = inb(ioaddr + TxStatus)) > 0) {
+			if (tx_status & 0x3C) {		/* A Tx-disabling error occurred.  */
+				if (vortex_debug > 2)
+				  printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n",
+						 dev->name, tx_status);
+				if (tx_status & 0x04) vp->stats.tx_fifo_errors++;
+				if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
+				if (tx_status & 0x30) {
+					issue_and_wait(dev, TxReset);
+				}
+				outw(TxEnable, ioaddr + EL3_CMD);
+			}
+			outb(0x00, ioaddr + TxStatus); /* Pop the status stack. */
+		}
+	}
+	return 0;
+}
+
+static int
+boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	/* Calculate the next Tx descriptor entry. */
+	int entry = vp->cur_tx % TX_RING_SIZE;
+	struct boom_tx_desc *prev_entry = &vp->tx_ring[(vp->cur_tx-1) % TX_RING_SIZE];
+	unsigned long flags;
+
+	if (vortex_debug > 6) {
+		printk(KERN_DEBUG "boomerang_start_xmit()\n");
+		if (vortex_debug > 3)
+			printk(KERN_DEBUG "%s: Trying to send a packet, Tx index %d.\n",
+				   dev->name, vp->cur_tx);
+	}
+
+	if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) {
+		if (vortex_debug > 0)
+			printk(KERN_WARNING "%s: BUG! Tx Ring full, refusing to send buffer.\n",
+				   dev->name);
+		netif_stop_queue(dev);
+		return 1;
+	}
+
+	vp->tx_skbuff[entry] = skb;
+
+	vp->tx_ring[entry].next = 0;
+#if DO_ZEROCOPY
+	if (skb->ip_summed != CHECKSUM_HW)
+			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
+	else
+			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum);
+
+	if (!skb_shinfo(skb)->nr_frags) {
+		vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
+										skb->len, PCI_DMA_TODEVICE));
+		vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len | LAST_FRAG);
+	} else {
+		int i;
+
+		vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
+										skb->len-skb->data_len, PCI_DMA_TODEVICE));
+		vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len-skb->data_len);
+
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			vp->tx_ring[entry].frag[i+1].addr =
+					cpu_to_le32(pci_map_single(vp->pdev,
+											   (void*)page_address(frag->page) + frag->page_offset,
+											   frag->size, PCI_DMA_TODEVICE));
+
+			if (i == skb_shinfo(skb)->nr_frags-1)
+					vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG);
+			else
+					vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size);
+		}
+	}
+#else
+	vp->tx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE));
+	vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
+	vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
+#endif
+
+	spin_lock_irqsave(&vp->lock, flags);
+	/* Wait for the stall to complete. */
+	issue_and_wait(dev, DownStall);
+	prev_entry->next = cpu_to_le32(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc));
+	if (inl(ioaddr + DownListPtr) == 0) {
+		outl(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc), ioaddr + DownListPtr);
+		vp->queued_packet++;
+	}
+
+	vp->cur_tx++;
+	if (vp->cur_tx - vp->dirty_tx > TX_RING_SIZE - 1) {
+		netif_stop_queue (dev);
+	} else {					/* Clear previous interrupt enable. */
+#if defined(tx_interrupt_mitigation)
+		/* Dubious. If in boomeang_interrupt "faster" cyclone ifdef
+		 * were selected, this would corrupt DN_COMPLETE. No?
+		 */
+		prev_entry->status &= cpu_to_le32(~TxIntrUploaded);
+#endif
+	}
+	outw(DownUnstall, ioaddr + EL3_CMD);
+	spin_unlock_irqrestore(&vp->lock, flags);
+	dev->trans_start = jiffies;
+	return 0;
+}
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+
+/*
+ * This is the ISR for the vortex series chips.
+ * full_bus_master_tx == 0 && full_bus_master_rx == 0
+ */
+
+static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr;
+	int status;
+	int work_done = max_interrupt_work;
+	
+	ioaddr = dev->base_addr;
+	spin_lock(&vp->lock);
+
+	status = inw(ioaddr + EL3_STATUS);
+
+	if (vortex_debug > 6)
+		printk("vortex_interrupt(). status=0x%4x\n", status);
+
+	if ((status & IntLatch) == 0)
+		goto handler_exit;		/* No interrupt: shared IRQs cause this */
+
+	if (status & IntReq) {
+		status |= vp->deferred;
+		vp->deferred = 0;
+	}
+
+	if (status == 0xffff)		/* h/w no longer present (hotplug)? */
+		goto handler_exit;
+
+	if (vortex_debug > 4)
+		printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
+			   dev->name, status, inb(ioaddr + Timer));
+
+	do {
+		if (vortex_debug > 5)
+				printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
+					   dev->name, status);
+		if (status & RxComplete)
+			vortex_rx(dev);
+
+		if (status & TxAvailable) {
+			if (vortex_debug > 5)
+				printk(KERN_DEBUG "	TX room bit was handled.\n");
+			/* There's room in the FIFO for a full-sized packet. */
+			outw(AckIntr | TxAvailable, ioaddr + EL3_CMD);
+			netif_wake_queue (dev);
+		}
+
+		if (status & DMADone) {
+			if (inw(ioaddr + Wn7_MasterStatus) & 0x1000) {
+				outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
+				pci_unmap_single(vp->pdev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
+				dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
+				if (inw(ioaddr + TxFree) > 1536) {
+					/*
+					 * AKPM: FIXME: I don't think we need this.  If the queue was stopped due to
+					 * insufficient FIFO room, the TxAvailable test will succeed and call
+					 * netif_wake_queue()
+					 */
+					netif_wake_queue(dev);
+				} else { /* Interrupt when FIFO has room for max-sized packet. */
+					outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
+					netif_stop_queue(dev);
+				}
+			}
+		}
+		/* Check for all uncommon interrupts at once. */
+		if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) {
+			if (status == 0xffff)
+				break;
+			vortex_error(dev, status);
+		}
+
+		if (--work_done < 0) {
+			printk(KERN_WARNING "%s: Too much work in interrupt, status "
+				   "%4.4x.\n", dev->name, status);
+			/* Disable all pending interrupts. */
+			do {
+				vp->deferred |= status;
+				outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
+					 ioaddr + EL3_CMD);
+				outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
+			} while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
+			/* The timer will reenable interrupts. */
+			mod_timer(&vp->timer, jiffies + 1*HZ);
+			break;
+		}
+		/* Acknowledge the IRQ. */
+		outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
+	} while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete));
+
+	if (vortex_debug > 4)
+		printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
+			   dev->name, status);
+handler_exit:
+	spin_unlock(&vp->lock);
+}
+
+/*
+ * This is the ISR for the boomerang series chips.
+ * full_bus_master_tx == 1 && full_bus_master_rx == 1
+ */
+
+static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr;
+	int status;
+	int work_done = max_interrupt_work;
+
+	ioaddr = dev->base_addr;
+
+	/*
+	 * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout
+	 * and boomerang_start_xmit
+	 */
+	spin_lock(&vp->lock);
+
+	status = inw(ioaddr + EL3_STATUS);
+
+	if (vortex_debug > 6)
+		printk(KERN_DEBUG "boomerang_interrupt. status=0x%4x\n", status);
+
+	if ((status & IntLatch) == 0)
+		goto handler_exit;		/* No interrupt: shared IRQs can cause this */
+
+	if (status == 0xffff) {		/* h/w no longer present (hotplug)? */
+		if (vortex_debug > 1)
+			printk(KERN_DEBUG "boomerang_interrupt(1): status = 0xffff\n");
+		goto handler_exit;
+	}
+
+	if (status & IntReq) {
+		status |= vp->deferred;
+		vp->deferred = 0;
+	}
+
+	if (vortex_debug > 4)
+		printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
+			   dev->name, status, inb(ioaddr + Timer));
+	do {
+		if (vortex_debug > 5)
+				printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
+					   dev->name, status);
+		if (status & UpComplete) {
+			outw(AckIntr | UpComplete, ioaddr + EL3_CMD);
+			if (vortex_debug > 5)
+				printk(KERN_DEBUG "boomerang_interrupt->boomerang_rx\n");
+			boomerang_rx(dev);
+		}
+
+		if (status & DownComplete) {
+			unsigned int dirty_tx = vp->dirty_tx;
+
+			outw(AckIntr | DownComplete, ioaddr + EL3_CMD);
+			while (vp->cur_tx - dirty_tx > 0) {
+				int entry = dirty_tx % TX_RING_SIZE;
+#if 1	/* AKPM: the latter is faster, but cyclone-only */
+				if (inl(ioaddr + DownListPtr) ==
+					vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc))
+					break;			/* It still hasn't been processed. */
+#else
+				if ((vp->tx_ring[entry].status & DN_COMPLETE) == 0)
+					break;			/* It still hasn't been processed. */
+#endif
+					
+				if (vp->tx_skbuff[entry]) {
+					struct sk_buff *skb = vp->tx_skbuff[entry];
+#if DO_ZEROCOPY					
+					int i;
+					for (i=0; i<=skb_shinfo(skb)->nr_frags; i++)
+							pci_unmap_single(vp->pdev,
+											 le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
+											 le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
+											 PCI_DMA_TODEVICE);
+#else
+					pci_unmap_single(vp->pdev,
+						le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
+#endif
+					dev_kfree_skb_irq(skb);
+					vp->tx_skbuff[entry] = 0;
+				} else {
+					printk(KERN_DEBUG "boomerang_interrupt: no skb!\n");
+				}
+				/* vp->stats.tx_packets++;  Counted below. */
+				dirty_tx++;
+			}
+			vp->dirty_tx = dirty_tx;
+			if (vp->cur_tx - dirty_tx <= TX_RING_SIZE - 1) {
+				if (vortex_debug > 6)
+					printk(KERN_DEBUG "boomerang_interrupt: wake queue\n");
+				netif_wake_queue (dev);
+			}
+		}
+
+		/* Check for all uncommon interrupts at once. */
+		if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq))
+			vortex_error(dev, status);
+
+		if (--work_done < 0) {
+			printk(KERN_WARNING "%s: Too much work in interrupt, status "
+				   "%4.4x.\n", dev->name, status);
+			/* Disable all pending interrupts. */
+			do {
+				vp->deferred |= status;
+				outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
+					 ioaddr + EL3_CMD);
+				outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
+			} while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
+			/* The timer will reenable interrupts. */
+			mod_timer(&vp->timer, jiffies + 1*HZ);
+			break;
+		}
+		/* Acknowledge the IRQ. */
+		outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
+		if (vp->cb_fn_base)			/* The PCMCIA people are idiots.  */
+			writel(0x8000, vp->cb_fn_base + 4);
+
+	} while ((status = inw(ioaddr + EL3_STATUS)) & IntLatch);
+
+	if (vortex_debug > 4)
+		printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
+			   dev->name, status);
+handler_exit:
+	spin_unlock(&vp->lock);
+}
+
+static int vortex_rx(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	int i;
+	short rx_status;
+
+	if (vortex_debug > 5)
+		printk(KERN_DEBUG "vortex_rx(): status %4.4x, rx_status %4.4x.\n",
+			   inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus));
+	while ((rx_status = inw(ioaddr + RxStatus)) > 0) {
+		if (rx_status & 0x4000) { /* Error, update stats. */
+			unsigned char rx_error = inb(ioaddr + RxErrors);
+			if (vortex_debug > 2)
+				printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+			vp->stats.rx_errors++;
+			if (rx_error & 0x01)  vp->stats.rx_over_errors++;
+			if (rx_error & 0x02)  vp->stats.rx_length_errors++;
+			if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
+			if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
+			if (rx_error & 0x10)  vp->stats.rx_length_errors++;
+		} else {
+			/* The packet length: up to 4.5K!. */
+			int pkt_len = rx_status & 0x1fff;
+			struct sk_buff *skb;
+
+			skb = dev_alloc_skb(pkt_len + 5);
+			if (vortex_debug > 4)
+				printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
+					   pkt_len, rx_status);
+			if (skb != NULL) {
+				skb->dev = dev;
+				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
+				/* 'skb_put()' points to the start of sk_buff data area. */
+				if (vp->bus_master &&
+					! (inw(ioaddr + Wn7_MasterStatus) & 0x8000)) {
+					dma_addr_t dma = pci_map_single(vp->pdev, skb_put(skb, pkt_len),
+									   pkt_len, PCI_DMA_FROMDEVICE);
+					outl(dma, ioaddr + Wn7_MasterAddr);
+					outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
+					outw(StartDMAUp, ioaddr + EL3_CMD);
+					while (inw(ioaddr + Wn7_MasterStatus) & 0x8000)
+						;
+					pci_unmap_single(vp->pdev, dma, pkt_len, PCI_DMA_FROMDEVICE);
+				} else {
+					insl(ioaddr + RX_FIFO, skb_put(skb, pkt_len),
+						 (pkt_len + 3) >> 2);
+				}
+				outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */
+				skb->protocol = eth_type_trans(skb, dev);
+				netif_rx(skb);
+				dev->last_rx = jiffies;
+				vp->stats.rx_packets++;
+				/* Wait a limited time to go to next packet. */
+				for (i = 200; i >= 0; i--)
+					if ( ! (inw(ioaddr + EL3_STATUS) & CmdInProgress))
+						break;
+				continue;
+			} else if (vortex_debug > 0)
+				printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of "
+					   "size %d.\n", dev->name, pkt_len);
+		}
+		vp->stats.rx_dropped++;
+		issue_and_wait(dev, RxDiscard);
+	}
+
+	return 0;
+}
+
+static int
+boomerang_rx(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	int entry = vp->cur_rx % RX_RING_SIZE;
+	long ioaddr = dev->base_addr;
+	int rx_status;
+	int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
+
+	if (vortex_debug > 5)
+		printk(KERN_DEBUG "boomerang_rx(): status %4.4x\n", inw(ioaddr+EL3_STATUS));
+
+	while ((rx_status = le32_to_cpu(vp->rx_ring[entry].status)) & RxDComplete){
+		if (--rx_work_limit < 0)
+			break;
+		if (rx_status & RxDError) { /* Error, update stats. */
+			unsigned char rx_error = rx_status >> 16;
+			if (vortex_debug > 2)
+				printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+			vp->stats.rx_errors++;
+			if (rx_error & 0x01)  vp->stats.rx_over_errors++;
+			if (rx_error & 0x02)  vp->stats.rx_length_errors++;
+			if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
+			if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
+			if (rx_error & 0x10)  vp->stats.rx_length_errors++;
+		} else {
+			/* The packet length: up to 4.5K!. */
+			int pkt_len = rx_status & 0x1fff;
+			struct sk_buff *skb;
+			dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
+
+			if (vortex_debug > 4)
+				printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
+					   pkt_len, rx_status);
+
+			/* Check if the packet is long enough to just accept without
+			   copying to a properly sized skbuff. */
+			if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
+				skb->dev = dev;
+				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
+				pci_dma_sync_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+				/* 'skb_put()' points to the start of sk_buff data area. */
+				memcpy(skb_put(skb, pkt_len),
+					   vp->rx_skbuff[entry]->tail,
+					   pkt_len);
+				vp->rx_copy++;
+			} else {
+				/* Pass up the skbuff already on the Rx ring. */
+				skb = vp->rx_skbuff[entry];
+				vp->rx_skbuff[entry] = NULL;
+				skb_put(skb, pkt_len);
+				pci_unmap_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+				vp->rx_nocopy++;
+			}
+			skb->protocol = eth_type_trans(skb, dev);
+			{					/* Use hardware checksum info. */
+				int csum_bits = rx_status & 0xee000000;
+				if (csum_bits &&
+					(csum_bits == (IPChksumValid | TCPChksumValid) ||
+					 csum_bits == (IPChksumValid | UDPChksumValid))) {
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+					vp->rx_csumhits++;
+				}
+			}
+			netif_rx(skb);
+			dev->last_rx = jiffies;
+			vp->stats.rx_packets++;
+		}
+		entry = (++vp->cur_rx) % RX_RING_SIZE;
+	}
+	/* Refill the Rx ring buffers. */
+	for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
+		struct sk_buff *skb;
+		entry = vp->dirty_rx % RX_RING_SIZE;
+		if (vp->rx_skbuff[entry] == NULL) {
+			skb = dev_alloc_skb(PKT_BUF_SZ);
+			if (skb == NULL) {
+				static unsigned long last_jif;
+				if ((jiffies - last_jif) > 10 * HZ) {
+					printk(KERN_WARNING "%s: memory shortage\n", dev->name);
+					last_jif = jiffies;
+				}
+				if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
+					mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
+				break;			/* Bad news!  */
+			}
+			skb->dev = dev;			/* Mark as being used by this device. */
+			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
+			vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+			vp->rx_skbuff[entry] = skb;
+		}
+		vp->rx_ring[entry].status = 0;	/* Clear complete bit. */
+		outw(UpUnstall, ioaddr + EL3_CMD);
+	}
+	return 0;
+}
+
+/*
+ * If we've hit a total OOM refilling the Rx ring we poll once a second
+ * for some memory.  Otherwise there is no way to restart the rx process.
+ */
+static void
+rx_oom_timer(unsigned long arg)
+{
+	struct net_device *dev = (struct net_device *)arg;
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+
+	spin_lock_irq(&vp->lock);
+	if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)	/* This test is redundant, but makes me feel good */
+		boomerang_rx(dev);
+	if (vortex_debug > 1) {
+		printk(KERN_DEBUG "%s: rx_oom_timer %s\n", dev->name,
+			((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
+	}
+	spin_unlock_irq(&vp->lock);
+}
+
+static void
+vortex_down(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+
+	netif_stop_queue (dev);
+
+	del_timer_sync(&vp->rx_oom_timer);
+	del_timer_sync(&vp->timer);
+
+	/* Turn off statistics ASAP.  We update vp->stats below. */
+	outw(StatsDisable, ioaddr + EL3_CMD);
+
+	/* Disable the receiver and transmitter. */
+	outw(RxDisable, ioaddr + EL3_CMD);
+	outw(TxDisable, ioaddr + EL3_CMD);
+
+	if (dev->if_port == XCVR_10base2)
+		/* Turn off thinnet power.  Green! */
+		outw(StopCoax, ioaddr + EL3_CMD);
+
+	outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD);
+
+	update_stats(ioaddr, dev);
+	if (vp->full_bus_master_rx)
+		outl(0, ioaddr + UpListPtr);
+	if (vp->full_bus_master_tx)
+		outl(0, ioaddr + DownListPtr);
+
+	if (vp->pdev && vp->enable_wol) {
+		pci_save_state(vp->pdev, vp->power_state);
+		acpi_set_WOL(dev);
+	}
+}
+
+static int
+vortex_close(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	int i;
+
+	if (netif_device_present(dev))
+		vortex_down(dev);
+
+	if (vortex_debug > 1) {
+		printk(KERN_DEBUG"%s: vortex_close() status %4.4x, Tx status %2.2x.\n",
+			   dev->name, inw(ioaddr + EL3_STATUS), inb(ioaddr + TxStatus));
+		printk(KERN_DEBUG "%s: vortex close stats: rx_nocopy %d rx_copy %d"
+			   " tx_queued %d Rx pre-checksummed %d.\n",
+			   dev->name, vp->rx_nocopy, vp->rx_copy, vp->queued_packet, vp->rx_csumhits);
+	}
+
+#if DO_ZEROCOPY
+	if (	vp->rx_csumhits &&
+			((vp->drv_flags & HAS_HWCKSM) == 0) &&
+			(hw_checksums[vp->card_idx] == -1)) {
+		printk(KERN_WARNING "%s supports hardware checksums, and we're not using them!\n", dev->name);
+		printk(KERN_WARNING "Please see http://www.uow.edu.au/~andrewm/zerocopy.html\n");
+	}
+#endif
+		
+	free_irq(dev->irq, dev);
+
+	if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
+		for (i = 0; i < RX_RING_SIZE; i++)
+			if (vp->rx_skbuff[i]) {
+				pci_unmap_single(	vp->pdev, le32_to_cpu(vp->rx_ring[i].addr),
+									PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+				dev_kfree_skb(vp->rx_skbuff[i]);
+				vp->rx_skbuff[i] = 0;
+			}
+	}
+	if (vp->full_bus_master_tx) { /* Free Boomerang bus master Tx buffers. */
+		for (i = 0; i < TX_RING_SIZE; i++) {
+			if (vp->tx_skbuff[i]) {
+				struct sk_buff *skb = vp->tx_skbuff[i];
+#if DO_ZEROCOPY
+				int k;
+
+				for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
+						pci_unmap_single(vp->pdev,
+										 le32_to_cpu(vp->tx_ring[i].frag[k].addr),
+										 le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
+										 PCI_DMA_TODEVICE);
+#else
+				pci_unmap_single(vp->pdev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
+#endif
+				dev_kfree_skb(skb);
+				vp->tx_skbuff[i] = 0;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+dump_tx_ring(struct net_device *dev)
+{
+	if (vortex_debug > 0) {
+		struct vortex_private *vp = (struct vortex_private *)dev->priv;
+		long ioaddr = dev->base_addr;
+		
+		if (vp->full_bus_master_tx) {
+			int i;
+			int stalled = inl(ioaddr + PktStatus) & 0x04;	/* Possible racy. But it's only debug stuff */
+
+			printk(KERN_ERR "  Flags; bus-master %d, dirty %d(%d) current %d(%d)\n",
+					vp->full_bus_master_tx,
+					vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE,
+					vp->cur_tx, vp->cur_tx % TX_RING_SIZE);
+			printk(KERN_ERR "  Transmit list %8.8x vs. %p.\n",
+				   inl(ioaddr + DownListPtr),
+				   &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]);
+			issue_and_wait(dev, DownStall);
+			for (i = 0; i < TX_RING_SIZE; i++) {
+				printk(KERN_ERR "  %d: @%p  length %8.8x status %8.8x\n", i,
+					   &vp->tx_ring[i],
+#if DO_ZEROCOPY
+					   le32_to_cpu(vp->tx_ring[i].frag[0].length),
+#else
+					   le32_to_cpu(vp->tx_ring[i].length),
+#endif
+					   le32_to_cpu(vp->tx_ring[i].status));
+			}
+			if (!stalled)
+				outw(DownUnstall, ioaddr + EL3_CMD);
+		}
+	}
+}
+
+static struct net_device_stats *vortex_get_stats(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	unsigned long flags;
+
+	if (netif_device_present(dev)) {	/* AKPM: Used to be netif_running */
+		spin_lock_irqsave (&vp->lock, flags);
+		update_stats(dev->base_addr, dev);
+		spin_unlock_irqrestore (&vp->lock, flags);
+	}
+	return &vp->stats;
+}
+
+/*  Update statistics.
+	Unlike with the EL3 we need not worry about interrupts changing
+	the window setting from underneath us, but we must still guard
+	against a race condition with a StatsUpdate interrupt updating the
+	table.  This is done by checking that the ASM (!) code generated uses
+	atomic updates with '+='.
+	*/
+static void update_stats(long ioaddr, struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	int old_window = inw(ioaddr + EL3_CMD);
+
+	if (old_window == 0xffff)	/* Chip suspended or ejected. */
+		return;
+	/* Unlike the 3c5x9 we need not turn off stats updates while reading. */
+	/* Switch to the stats window, and read everything. */
+	EL3WINDOW(6);
+	vp->stats.tx_carrier_errors		+= inb(ioaddr + 0);
+	vp->stats.tx_heartbeat_errors	+= inb(ioaddr + 1);
+	/* Multiple collisions. */		inb(ioaddr + 2);
+	vp->stats.collisions			+= inb(ioaddr + 3);
+	vp->stats.tx_window_errors		+= inb(ioaddr + 4);
+	vp->stats.rx_fifo_errors		+= inb(ioaddr + 5);
+	vp->stats.tx_packets			+= inb(ioaddr + 6);
+	vp->stats.tx_packets			+= (inb(ioaddr + 9)&0x30) << 4;
+	/* Rx packets	*/				inb(ioaddr + 7);   /* Must read to clear */
+	/* Tx deferrals */				inb(ioaddr + 8);
+	/* Don't bother with register 9, an extension of registers 6&7.
+	   If we do use the 6&7 values the atomic update assumption above
+	   is invalid. */
+	vp->stats.rx_bytes += inw(ioaddr + 10);
+	vp->stats.tx_bytes += inw(ioaddr + 12);
+	/* New: On the Vortex we must also clear the BadSSD counter. */
+	EL3WINDOW(4);
+	inb(ioaddr + 12);
+
+	{
+		u8 up = inb(ioaddr + 13);
+		vp->stats.rx_bytes += (up & 0x0f) << 16;
+		vp->stats.tx_bytes += (up & 0xf0) << 12;
+	}
+
+	EL3WINDOW(old_window >> 13);
+	return;
+}
+
+
+static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr)
+{
+	struct vortex_private *vp = dev->priv;
+	u32 ethcmd;
+		
+	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+		return -EFAULT;
+
+        switch (ethcmd) {
+        case ETHTOOL_GDRVINFO: {
+		struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO};
+		strcpy(info.driver, DRV_NAME);
+		strcpy(info.version, DRV_VERSION);
+		if (vp->pdev)
+			strcpy(info.bus_info, vp->pdev->slot_name);
+		else
+			sprintf(info.bus_info, "EISA 0x%lx %d",
+				dev->base_addr, dev->irq);
+		if (copy_to_user(useraddr, &info, sizeof(info)))
+			return -EFAULT;
+		return 0;
+	}
+
+        }
+	
+	return -EOPNOTSUPP;
+}
+
+static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
+	int phy = vp->phys[0] & 0x1f;
+	int retval;
+
+	switch(cmd) {
+	case SIOCETHTOOL:
+		return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
+
+	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
+	case SIOCDEVPRIVATE:		/* for binary compat, remove in 2.5 */
+		data->phy_id = phy;
+
+	case SIOCGMIIREG:		/* Read MII PHY register. */
+	case SIOCDEVPRIVATE+1:		/* for binary compat, remove in 2.5 */
+		EL3WINDOW(4);
+		data->val_out = mdio_read(dev, data->phy_id & 0x1f, data->reg_num & 0x1f);
+		retval = 0;
+		break;
+
+	case SIOCSMIIREG:		/* Write MII PHY register. */
+	case SIOCDEVPRIVATE+2:		/* for binary compat, remove in 2.5 */
+		if (!capable(CAP_NET_ADMIN)) {
+			retval = -EPERM;
+		} else {
+			EL3WINDOW(4);
+			mdio_write(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in);
+			retval = 0;
+		}
+		break;
+	default:
+		retval = -EOPNOTSUPP;
+		break;
+	}
+
+	return retval;
+}
+
+/* Pre-Cyclone chips have no documented multicast filter, so the only
+   multicast setting is to receive all multicast frames.  At least
+   the chip has a very clean way to set the mode, unlike many others. */
+static void set_rx_mode(struct net_device *dev)
+{
+	long ioaddr = dev->base_addr;
+	int new_mode;
+
+	if (dev->flags & IFF_PROMISC) {
+		if (vortex_debug > 0)
+			printk(KERN_NOTICE "%s: Setting promiscuous mode.\n", dev->name);
+		new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast|RxProm;
+	} else	if ((dev->mc_list)  ||  (dev->flags & IFF_ALLMULTI)) {
+		new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast;
+	} else
+		new_mode = SetRxFilter | RxStation | RxBroadcast;
+
+	outw(new_mode, ioaddr + EL3_CMD);
+}
+
+/* MII transceiver control section.
+   Read and write the MII registers using software-generated serial
+   MDIO protocol.  See the MII specifications or DP83840A data sheet
+   for details. */
+
+/* The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
+   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
+   "overclocking" issues. */
+#define mdio_delay() inl(mdio_addr)
+
+#define MDIO_SHIFT_CLK	0x01
+#define MDIO_DIR_WRITE	0x04
+#define MDIO_DATA_WRITE0 (0x00 | MDIO_DIR_WRITE)
+#define MDIO_DATA_WRITE1 (0x02 | MDIO_DIR_WRITE)
+#define MDIO_DATA_READ	0x02
+#define MDIO_ENB_IN		0x00
+
+/* Generate the preamble required for initial synchronization and
+   a few older transceivers. */
+static void mdio_sync(long ioaddr, int bits)
+{
+	long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+
+	/* Establish sync by sending at least 32 logic ones. */
+	while (-- bits >= 0) {
+		outw(MDIO_DATA_WRITE1, mdio_addr);
+		mdio_delay();
+		outw(MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
+		mdio_delay();
+	}
+}
+
+static int mdio_read(struct net_device *dev, int phy_id, int location)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	int i;
+	long ioaddr = dev->base_addr;
+	int read_cmd = (0xf6 << 10) | (phy_id << 5) | location;
+	unsigned int retval = 0;
+	long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+
+	spin_lock_bh(&vp->mdio_lock);
+
+	if (mii_preamble_required)
+		mdio_sync(ioaddr, 32);
+
+	/* Shift the read command bits out. */
+	for (i = 14; i >= 0; i--) {
+		int dataval = (read_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
+		outw(dataval, mdio_addr);
+		mdio_delay();
+		outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
+		mdio_delay();
+	}
+	/* Read the two transition, 16 data, and wire-idle bits. */
+	for (i = 19; i > 0; i--) {
+		outw(MDIO_ENB_IN, mdio_addr);
+		mdio_delay();
+		retval = (retval << 1) | ((inw(mdio_addr) & MDIO_DATA_READ) ? 1 : 0);
+		outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+		mdio_delay();
+	}
+	spin_unlock_bh(&vp->mdio_lock);
+	return retval & 0x20000 ? 0xffff : retval>>1 & 0xffff;
+}
+
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+	int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value;
+	long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+	int i;
+
+	spin_lock_bh(&vp->mdio_lock);
+
+	if (mii_preamble_required)
+		mdio_sync(ioaddr, 32);
+
+	/* Shift the command bits out. */
+	for (i = 31; i >= 0; i--) {
+		int dataval = (write_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
+		outw(dataval, mdio_addr);
+		mdio_delay();
+		outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
+		mdio_delay();
+	}
+	/* Leave the interface idle. */
+	for (i = 1; i >= 0; i--) {
+		outw(MDIO_ENB_IN, mdio_addr);
+		mdio_delay();
+		outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+		mdio_delay();
+	}
+	spin_unlock_bh(&vp->mdio_lock);
+	return;
+}
+
+/* ACPI: Advanced Configuration and Power Interface. */
+/* Set Wake-On-LAN mode and put the board into D3 (power-down) state. */
+static void acpi_set_WOL(struct net_device *dev)
+{
+	struct vortex_private *vp = (struct vortex_private *)dev->priv;
+	long ioaddr = dev->base_addr;
+
+	/* Power up on: 1==Downloaded Filter, 2==Magic Packets, 4==Link Status. */
+	EL3WINDOW(7);
+	outw(2, ioaddr + 0x0c);
+	/* The RxFilter must accept the WOL frames. */
+	outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD);
+	outw(RxEnable, ioaddr + EL3_CMD);
+
+	/* Change the power state to D3; RxEnable doesn't take effect. */
+	pci_enable_wake(vp->pdev, 0, 1);
+	pci_set_power_state(vp->pdev, 3);
+}
+
+
+static void __devexit vortex_remove_one (struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct vortex_private *vp;
+
+	if (!dev) {
+		printk("vortex_remove_one called for EISA device!\n");
+		BUG();
+	}
+
+	vp = dev->priv;
+
+	/* AKPM: FIXME: we should have
+	 *	if (vp->cb_fn_base) iounmap(vp->cb_fn_base);
+	 * here
+	 */
+	unregister_netdev(dev);
+	/* Should really use issue_and_wait() here */
+	outw(TotalReset|0x14, dev->base_addr + EL3_CMD);
+
+	if (vp->pdev && vp->enable_wol) {
+		pci_set_power_state(vp->pdev, 0);	/* Go active */
+		if (vp->pm_state_valid)
+			pci_restore_state(vp->pdev, vp->power_state);
+	}
+
+	pci_free_consistent(pdev,
+						sizeof(struct boom_rx_desc) * RX_RING_SIZE
+							+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+						vp->rx_ring,
+						vp->rx_ring_dma);
+	if (vp->must_free_region)
+		release_region(dev->base_addr, vp->io_size);
+	kfree(dev);
+}
+
+
+static struct pci_driver vortex_driver = {
+	name:		"3c59x",
+	probe:		vortex_init_one,
+	remove:		__devexit_p(vortex_remove_one),
+	id_table:	vortex_pci_tbl,
+#ifdef CONFIG_PM
+	suspend:	vortex_suspend,
+	resume:		vortex_resume,
+#endif
+};
+
+
+static int vortex_have_pci;
+static int vortex_have_eisa;
+
+
+static int __init vortex_init (void)
+{
+	int pci_rc, eisa_rc;
+
+	pci_rc = pci_module_init(&vortex_driver);
+	eisa_rc = vortex_eisa_init();
+
+	if (pci_rc == 0)
+		vortex_have_pci = 1;
+	if (eisa_rc > 0)
+		vortex_have_eisa = 1;
+
+	return (vortex_have_pci + vortex_have_eisa) ? 0 : -ENODEV;
+}
+
+
+static void __exit vortex_eisa_cleanup (void)
+{
+	struct net_device *dev, *tmp;
+	struct vortex_private *vp;
+	long ioaddr;
+
+	dev = root_vortex_eisa_dev;
+
+	while (dev) {
+		vp = dev->priv;
+		ioaddr = dev->base_addr;
+
+		unregister_netdev (dev);
+		outw (TotalReset, ioaddr + EL3_CMD);
+		release_region (ioaddr, VORTEX_TOTAL_SIZE);
+
+		tmp = dev;
+		dev = vp->next_module;
+
+		kfree (tmp);
+	}
+}
+
+
+static void __exit vortex_cleanup (void)
+{
+	if (vortex_have_pci)
+		pci_unregister_driver (&vortex_driver);
+	if (vortex_have_eisa)
+		vortex_eisa_cleanup ();
+}
+
+
+module_init(vortex_init);
+module_exit(vortex_cleanup);
diff --git a/xen/drivers/net/Makefile b/xen/drivers/net/Makefile
new file mode 100644
index 0000000000..34954de493
--- /dev/null
+++ b/xen/drivers/net/Makefile
@@ -0,0 +1,13 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(MAKE) -C ne
+	$(MAKE) -C e1000
+	$(LD) -r -o driver.o e1000/e1000.o $(OBJS) ne/ne_drv.o
+
+clean:
+	$(MAKE) -C ne clean
+	rm -f *.o *~ core
+
+.PHONY: default clean
diff --git a/xen/drivers/net/Space.c b/xen/drivers/net/Space.c
new file mode 100644
index 0000000000..5724837106
--- /dev/null
+++ b/xen/drivers/net/Space.c
@@ -0,0 +1,44 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Holds initial configuration information for devices.
+ *
+ * Version:	@(#)Space.c	1.0.7	08/12/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Donald J. Becker, <becker@scyld.com>
+ */
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+/*
+ * KAF (23/7/02): All the probe shit is gone from here -- each network
+ * driver should probe as part of its setup, and dynamically append
+ * to dev_base when it finds a NIC.
+ */
+
+/*
+ * The @dev_base list is protected by @dev_base_lock and the rtln
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading.
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base list, and hold dev_base_lock for writing when they do the
+ * actual updates.  This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+struct net_device *dev_base = NULL;
+rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+
diff --git a/xen/drivers/net/e1000/LICENSE b/xen/drivers/net/e1000/LICENSE
new file mode 100644
index 0000000000..5f297e5bb4
--- /dev/null
+++ b/xen/drivers/net/e1000/LICENSE
@@ -0,0 +1,339 @@
+
+"This software program is licensed subject to the GNU General Public License 
+(GPL). Version 2, June 1991, available at 
+<http://www.fsf.org/copyleft/gpl.html>"
+
+GNU General Public License 
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.  
+59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to 
+share and change it. By contrast, the GNU General Public License is intended
+to guarantee your freedom to share and change free software--to make sure 
+the software is free for all its users. This General Public License applies 
+to most of the Free Software Foundation's software and to any other program 
+whose authors commit to using it. (Some other Free Software Foundation 
+software is covered by the GNU Library General Public License instead.) You 
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom 
+to distribute copies of free software (and charge for this service if you 
+wish), that you receive source code or can get it if you want it, that you 
+can change the software or use pieces of it in new free programs; and that 
+you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to 
+deny you these rights or to ask you to surrender the rights. These 
+restrictions translate to certain responsibilities for you if you distribute
+copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or 
+for a fee, you must give the recipients all the rights that you have. You 
+must make sure that they, too, receive or can get the source code. And you 
+must show them these terms so they know their rights.
+ 
+We protect your rights with two steps: (1) copyright the software, and (2) 
+offer you this license which gives you legal permission to copy, distribute 
+and/or modify the software. 
+
+Also, for each author's protection and ours, we want to make certain that 
+everyone understands that there is no warranty for this free software. If 
+the software is modified by someone else and passed on, we want its 
+recipients to know that what they have is not the original, so that any 
+problems introduced by others will not reflect on the original authors' 
+reputations. 
+
+Finally, any free program is threatened constantly by software patents. We 
+wish to avoid the danger that redistributors of a free program will 
+individually obtain patent licenses, in effect making the program 
+proprietary. To prevent this, we have made it clear that any patent must be 
+licensed for everyone's free use or not licensed at all. 
+
+The precise terms and conditions for copying, distribution and modification 
+follow. 
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+   placed by the copyright holder saying it may be distributed under the 
+   terms of this General Public License. The "Program", below, refers to any
+   such program or work, and a "work based on the Program" means either the 
+   Program or any derivative work under copyright law: that is to say, a 
+   work containing the Program or a portion of it, either verbatim or with 
+   modifications and/or translated into another language. (Hereinafter, 
+   translation is included without limitation in the term "modification".) 
+   Each licensee is addressed as "you". 
+
+   Activities other than copying, distribution and modification are not 
+   covered by this License; they are outside its scope. The act of running 
+   the Program is not restricted, and the output from the Program is covered 
+   only if its contents constitute a work based on the Program (independent 
+   of having been made by running the Program). Whether that is true depends
+   on what the Program does. 
+
+1. You may copy and distribute verbatim copies of the Program's source code 
+   as you receive it, in any medium, provided that you conspicuously and 
+   appropriately publish on each copy an appropriate copyright notice and 
+   disclaimer of warranty; keep intact all the notices that refer to this 
+   License and to the absence of any warranty; and give any other recipients 
+   of the Program a copy of this License along with the Program. 
+
+   You may charge a fee for the physical act of transferring a copy, and you 
+   may at your option offer warranty protection in exchange for a fee. 
+
+2. You may modify your copy or copies of the Program or any portion of it, 
+   thus forming a work based on the Program, and copy and distribute such 
+   modifications or work under the terms of Section 1 above, provided that 
+   you also meet all of these conditions: 
+
+   * a) You must cause the modified files to carry prominent notices stating 
+        that you changed the files and the date of any change. 
+
+   * b) You must cause any work that you distribute or publish, that in 
+        whole or in part contains or is derived from the Program or any part 
+        thereof, to be licensed as a whole at no charge to all third parties
+        under the terms of this License. 
+
+   * c) If the modified program normally reads commands interactively when 
+        run, you must cause it, when started running for such interactive 
+        use in the most ordinary way, to print or display an announcement 
+        including an appropriate copyright notice and a notice that there is
+        no warranty (or else, saying that you provide a warranty) and that 
+        users may redistribute the program under these conditions, and 
+        telling the user how to view a copy of this License. (Exception: if 
+        the Program itself is interactive but does not normally print such 
+        an announcement, your work based on the Program is not required to 
+        print an announcement.) 
+
+   These requirements apply to the modified work as a whole. If identifiable 
+   sections of that work are not derived from the Program, and can be 
+   reasonably considered independent and separate works in themselves, then 
+   this License, and its terms, do not apply to those sections when you 
+   distribute them as separate works. But when you distribute the same 
+   sections as part of a whole which is a work based on the Program, the 
+   distribution of the whole must be on the terms of this License, whose 
+   permissions for other licensees extend to the entire whole, and thus to 
+   each and every part regardless of who wrote it. 
+
+   Thus, it is not the intent of this section to claim rights or contest 
+   your rights to work written entirely by you; rather, the intent is to 
+   exercise the right to control the distribution of derivative or 
+   collective works based on the Program. 
+
+   In addition, mere aggregation of another work not based on the Program 
+   with the Program (or with a work based on the Program) on a volume of a 
+   storage or distribution medium does not bring the other work under the 
+   scope of this License. 
+
+3. You may copy and distribute the Program (or a work based on it, under 
+   Section 2) in object code or executable form under the terms of Sections 
+   1 and 2 above provided that you also do one of the following: 
+
+   * a) Accompany it with the complete corresponding machine-readable source 
+        code, which must be distributed under the terms of Sections 1 and 2 
+        above on a medium customarily used for software interchange; or, 
+
+   * b) Accompany it with a written offer, valid for at least three years, 
+        to give any third party, for a charge no more than your cost of 
+        physically performing source distribution, a complete machine-
+        readable copy of the corresponding source code, to be distributed 
+        under the terms of Sections 1 and 2 above on a medium customarily 
+        used for software interchange; or, 
+
+   * c) Accompany it with the information you received as to the offer to 
+        distribute corresponding source code. (This alternative is allowed 
+        only for noncommercial distribution and only if you received the 
+        program in object code or executable form with such an offer, in 
+        accord with Subsection b above.) 
+
+   The source code for a work means the preferred form of the work for 
+   making modifications to it. For an executable work, complete source code 
+   means all the source code for all modules it contains, plus any 
+   associated interface definition files, plus the scripts used to control 
+   compilation and installation of the executable. However, as a special 
+   exception, the source code distributed need not include anything that is 
+   normally distributed (in either source or binary form) with the major 
+   components (compiler, kernel, and so on) of the operating system on which
+   the executable runs, unless that component itself accompanies the 
+   executable. 
+
+   If distribution of executable or object code is made by offering access 
+   to copy from a designated place, then offering equivalent access to copy 
+   the source code from the same place counts as distribution of the source 
+   code, even though third parties are not compelled to copy the source 
+   along with the object code. 
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+   expressly provided under this License. Any attempt otherwise to copy, 
+   modify, sublicense or distribute the Program is void, and will 
+   automatically terminate your rights under this License. However, parties 
+   who have received copies, or rights, from you under this License will not
+   have their licenses terminated so long as such parties remain in full 
+   compliance. 
+
+5. You are not required to accept this License, since you have not signed 
+   it. However, nothing else grants you permission to modify or distribute 
+   the Program or its derivative works. These actions are prohibited by law 
+   if you do not accept this License. Therefore, by modifying or 
+   distributing the Program (or any work based on the Program), you 
+   indicate your acceptance of this License to do so, and all its terms and
+   conditions for copying, distributing or modifying the Program or works 
+   based on it. 
+
+6. Each time you redistribute the Program (or any work based on the 
+   Program), the recipient automatically receives a license from the 
+   original licensor to copy, distribute or modify the Program subject to 
+   these terms and conditions. You may not impose any further restrictions 
+   on the recipients' exercise of the rights granted herein. You are not 
+   responsible for enforcing compliance by third parties to this License. 
+
+7. If, as a consequence of a court judgment or allegation of patent 
+   infringement or for any other reason (not limited to patent issues), 
+   conditions are imposed on you (whether by court order, agreement or 
+   otherwise) that contradict the conditions of this License, they do not 
+   excuse you from the conditions of this License. If you cannot distribute 
+   so as to satisfy simultaneously your obligations under this License and 
+   any other pertinent obligations, then as a consequence you may not 
+   distribute the Program at all. For example, if a patent license would 
+   not permit royalty-free redistribution of the Program by all those who 
+   receive copies directly or indirectly through you, then the only way you 
+   could satisfy both it and this License would be to refrain entirely from 
+   distribution of the Program. 
+
+   If any portion of this section is held invalid or unenforceable under any
+   particular circumstance, the balance of the section is intended to apply
+   and the section as a whole is intended to apply in other circumstances. 
+
+   It is not the purpose of this section to induce you to infringe any 
+   patents or other property right claims or to contest validity of any 
+   such claims; this section has the sole purpose of protecting the 
+   integrity of the free software distribution system, which is implemented 
+   by public license practices. Many people have made generous contributions
+   to the wide range of software distributed through that system in 
+   reliance on consistent application of that system; it is up to the 
+   author/donor to decide if he or she is willing to distribute software 
+   through any other system and a licensee cannot impose that choice. 
+
+   This section is intended to make thoroughly clear what is believed to be 
+   a consequence of the rest of this License. 
+
+8. If the distribution and/or use of the Program is restricted in certain 
+   countries either by patents or by copyrighted interfaces, the original 
+   copyright holder who places the Program under this License may add an 
+   explicit geographical distribution limitation excluding those countries, 
+   so that distribution is permitted only in or among countries not thus 
+   excluded. In such case, this License incorporates the limitation as if 
+   written in the body of this License. 
+
+9. The Free Software Foundation may publish revised and/or new versions of 
+   the General Public License from time to time. Such new versions will be 
+   similar in spirit to the present version, but may differ in detail to 
+   address new problems or concerns. 
+
+   Each version is given a distinguishing version number. If the Program 
+   specifies a version number of this License which applies to it and "any 
+   later version", you have the option of following the terms and 
+   conditions either of that version or of any later version published by 
+   the Free Software Foundation. If the Program does not specify a version 
+   number of this License, you may choose any version ever published by the 
+   Free Software Foundation. 
+
+10. If you wish to incorporate parts of the Program into other free programs
+    whose distribution conditions are different, write to the author to ask 
+    for permission. For software which is copyrighted by the Free Software 
+    Foundation, write to the Free Software Foundation; we sometimes make 
+    exceptions for this. Our decision will be guided by the two goals of 
+    preserving the free status of all derivatives of our free software and 
+    of promoting the sharing and reuse of software generally. 
+
+   NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 
+    FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 
+    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 
+    PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 
+    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 
+    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH 
+    YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL 
+    NECESSARY SERVICING, REPAIR OR CORRECTION. 
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 
+    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 
+    REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR 
+    DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL 
+    DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM 
+    (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED 
+    INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF 
+    THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR 
+    OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest 
+possible use to the public, the best way to achieve this is to make it free 
+software which everyone can redistribute and change under these terms. 
+
+To do so, attach the following notices to the program. It is safest to 
+attach them to the start of each source file to most effectively convey the
+exclusion of warranty; and each file should have at least the "copyright" 
+line and a pointer to where the full notice is found. 
+
+one line to give the program's name and an idea of what it does.
+Copyright (C) yyyy  name of author
+
+This program is free software; you can redistribute it and/or modify it 
+under the terms of the GNU General Public License as published by the Free 
+Software Foundation; either version 2 of the License, or (at your option) 
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT 
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 
+Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail. 
+
+If the program is interactive, make it output a short notice like this when 
+it starts in an interactive mode: 
+
+Gnomovision version 69, Copyright (C) year name of author Gnomovision comes 
+with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free 
+software, and you are welcome to redistribute it under certain conditions; 
+type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate 
+parts of the General Public License. Of course, the commands you use may be 
+called something other than 'show w' and 'show c'; they could even be 
+mouse-clicks or menu items--whatever suits your program. 
+
+You should also get your employer (if you work as a programmer) or your 
+school, if any, to sign a "copyright disclaimer" for the program, if 
+necessary. Here is a sample; alter the names: 
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program 
+'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+signature of Ty Coon, 1 April 1989
+Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into 
+proprietary programs. If your program is a subroutine library, you may 
+consider it more useful to permit linking proprietary applications with the 
+library. If this is what you want to do, use the GNU Library General Public 
+License instead of this License.
diff --git a/xen/drivers/net/e1000/Makefile b/xen/drivers/net/e1000/Makefile
new file mode 100644
index 0000000000..f262fcf32b
--- /dev/null
+++ b/xen/drivers/net/e1000/Makefile
@@ -0,0 +1,39 @@
+################################################################################
+#
+# 
+# Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it 
+# under the terms of the GNU General Public License as published by the Free 
+# Software Foundation; either version 2 of the License, or (at your option) 
+# any later version.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT 
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+# more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 
+# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# 
+# The full GNU General Public License is included in this distribution in the
+# file called LICENSE.
+# 
+# Contact Information:
+# Linux NICS <linux.nics@intel.com>
+# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+#
+################################################################################
+
+#
+# Makefile for the Intel(R) PRO/1000 ethernet driver
+#
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o e1000.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/drivers/net/e1000/e1000.h b/xen/drivers/net/e1000/e1000.h
new file mode 100644
index 0000000000..d94e390ba3
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000.h
@@ -0,0 +1,208 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _E1000_H_
+#define _E1000_H_
+
+//#include <linux/stddef.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+//#include <linux/string.h>
+//#include <linux/pagemap.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+//#include <linux/capability.h>
+#include <linux/in.h>
+//#include <linux/ip.h>
+//#include <linux/tcp.h>
+//#include <linux/udp.h>
+//#include <net/pkt_sched.h>
+#include <linux/list.h>
+#include <linux/reboot.h>
+#include <linux/tqueue.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+#define BAR_0		0
+#define BAR_1		1
+#define BAR_5		5
+#define PCI_DMA_64BIT	0xffffffffffffffffULL
+#define PCI_DMA_32BIT	0x00000000ffffffffULL
+
+
+struct e1000_adapter;
+
+// XEN XXX
+// #define DBG 1
+
+#include "e1000_hw.h"
+
+#if DBG
+#define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args)
+#else
+#define E1000_DBG(args...)
+#endif
+
+#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args)
+
+#define E1000_MAX_INTR 10
+
+/* Supported Rx Buffer Sizes */
+#define E1000_RXBUFFER_2048  2048
+#define E1000_RXBUFFER_4096  4096
+#define E1000_RXBUFFER_8192  8192
+#define E1000_RXBUFFER_16384 16384
+
+/* Flow Control High-Watermark: 43464 bytes */
+#define E1000_FC_HIGH_THRESH 0xA9C8
+
+/* Flow Control Low-Watermark: 43456 bytes */
+#define E1000_FC_LOW_THRESH 0xA9C0
+
+/* Flow Control Pause Time: 858 usec */
+#define E1000_FC_PAUSE_TIME 0x0680
+
+/* How many Tx Descriptors do we need to call netif_wake_queue ? */
+#define E1000_TX_QUEUE_WAKE	16
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define E1000_RX_BUFFER_WRITE	16
+
+#define E1000_JUMBO_PBA      0x00000028
+#define E1000_DEFAULT_PBA    0x00000030
+
+#define AUTO_ALL_MODES       0
+#define E1000_EEPROM_APME    4
+
+/* only works for sizes that are powers of 2 */
+#define E1000_ROUNDUP(i, size) ((i) = (((i) + (size) - 1) & ~((size) - 1)))
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct e1000_buffer {
+	struct sk_buff *skb;
+	uint64_t dma;
+	unsigned long length;
+	unsigned long time_stamp;
+};
+
+struct e1000_desc_ring {
+	/* pointer to the descriptor ring memory */
+	void *desc;
+	/* physical address of the descriptor ring */
+	dma_addr_t dma;
+	/* length of descriptor ring in bytes */
+	unsigned int size;
+	/* number of descriptors in the ring */
+	unsigned int count;
+	/* next descriptor to associate a buffer with */
+	unsigned int next_to_use;
+	/* next descriptor to check for DD status bit */
+	unsigned int next_to_clean;
+	/* array of buffer information structs */
+	struct e1000_buffer *buffer_info;
+};
+
+#define E1000_DESC_UNUSED(R) \
+((((R)->next_to_clean + (R)->count) - ((R)->next_to_use + 1)) % ((R)->count))
+
+#define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
+#define E1000_RX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_rx_desc)
+#define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
+#define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
+
+/* board specific private data structure */
+
+struct e1000_adapter {
+	struct timer_list watchdog_timer;
+	struct timer_list phy_info_timer;
+	struct vlan_group *vlgrp;
+	char *id_string;
+	uint32_t bd_number;
+	uint32_t rx_buffer_len;
+	uint32_t part_num;
+	uint32_t wol;
+	uint16_t link_speed;
+	uint16_t link_duplex;
+	spinlock_t stats_lock;
+	atomic_t irq_sem;
+	struct tq_struct tx_timeout_task;
+
+	struct timer_list blink_timer;
+	unsigned long led_status;
+
+	/* TX */
+	struct e1000_desc_ring tx_ring;
+	uint32_t txd_cmd;
+	uint32_t tx_int_delay;
+	uint32_t tx_abs_int_delay;
+	int max_data_per_txd;
+
+	/* RX */
+	struct e1000_desc_ring rx_ring;
+	uint64_t hw_csum_err;
+	uint64_t hw_csum_good;
+	uint32_t rx_int_delay;
+	uint32_t rx_abs_int_delay;
+	boolean_t rx_csum;
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+	struct net_device_stats net_stats;
+
+	/* structs defined in e1000_hw.h */
+	struct e1000_hw hw;
+	struct e1000_hw_stats stats;
+	struct e1000_phy_info phy_info;
+	struct e1000_phy_stats phy_stats;
+
+
+
+	uint32_t pci_state[16];
+	char ifname[IFNAMSIZ];
+};
+#endif /* _E1000_H_ */
diff --git a/xen/drivers/net/e1000/e1000_ethtool.c b/xen/drivers/net/e1000/e1000_ethtool.c
new file mode 100644
index 0000000000..d06ef79c6e
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_ethtool.c
@@ -0,0 +1,611 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool support for e1000 */
+
+#include "e1000.h"
+
+#include <asm/uaccess.h>
+
+extern char e1000_driver_name[];
+extern char e1000_driver_version[];
+
+extern int e1000_up(struct e1000_adapter *adapter);
+extern void e1000_down(struct e1000_adapter *adapter);
+extern void e1000_reset(struct e1000_adapter *adapter);
+
+static char e1000_gstrings_stats[][ETH_GSTRING_LEN] = {
+	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
+	"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
+	"rx_length_errors", "rx_over_errors", "rx_crc_errors",
+	"rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
+	"tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
+	"tx_heartbeat_errors", "tx_window_errors",
+};
+#define E1000_STATS_LEN	sizeof(e1000_gstrings_stats) / ETH_GSTRING_LEN
+
+static void
+e1000_ethtool_gset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if(hw->media_type == e1000_media_type_copper) {
+
+		ecmd->supported = (SUPPORTED_10baseT_Half |
+		                   SUPPORTED_10baseT_Full |
+		                   SUPPORTED_100baseT_Half |
+		                   SUPPORTED_100baseT_Full |
+		                   SUPPORTED_1000baseT_Full|
+		                   SUPPORTED_Autoneg |
+		                   SUPPORTED_TP);
+
+		ecmd->advertising = ADVERTISED_TP;
+
+		if(hw->autoneg == 1) {
+			ecmd->advertising |= ADVERTISED_Autoneg;
+
+			/* the e1000 autoneg seems to match ethtool nicely */
+
+			ecmd->advertising |= hw->autoneg_advertised;
+		}
+
+		ecmd->port = PORT_TP;
+		ecmd->phy_address = hw->phy_addr;
+
+		if(hw->mac_type == e1000_82543)
+			ecmd->transceiver = XCVR_EXTERNAL;
+		else
+			ecmd->transceiver = XCVR_INTERNAL;
+
+	} else {
+		ecmd->supported   = (SUPPORTED_1000baseT_Full |
+				     SUPPORTED_FIBRE |
+				     SUPPORTED_Autoneg);
+
+		ecmd->advertising = (SUPPORTED_1000baseT_Full |
+				     SUPPORTED_FIBRE |
+				     SUPPORTED_Autoneg);
+
+		ecmd->port = PORT_FIBRE;
+
+		if(hw->mac_type >= e1000_82545)
+			ecmd->transceiver = XCVR_INTERNAL;
+		else
+			ecmd->transceiver = XCVR_EXTERNAL;
+	}
+
+	if(netif_carrier_ok(adapter->netdev)) {
+
+		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
+		                                   &adapter->link_duplex);
+		ecmd->speed = adapter->link_speed;
+
+		/* unfortunatly FULL_DUPLEX != DUPLEX_FULL
+		 *          and HALF_DUPLEX != DUPLEX_HALF */
+
+		if(adapter->link_duplex == FULL_DUPLEX)
+			ecmd->duplex = DUPLEX_FULL;
+		else
+			ecmd->duplex = DUPLEX_HALF;
+	} else {
+		ecmd->speed = -1;
+		ecmd->duplex = -1;
+	}
+
+	ecmd->autoneg = (hw->autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+}
+
+static int
+e1000_ethtool_sset(struct e1000_adapter *adapter, struct ethtool_cmd *ecmd)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if(ecmd->autoneg == AUTONEG_ENABLE) {
+		hw->autoneg = 1;
+		hw->autoneg_advertised = 0x002F;
+		ecmd->advertising = 0x002F;
+	} else {
+		hw->autoneg = 0;
+		switch(ecmd->speed + ecmd->duplex) {
+		case SPEED_10 + DUPLEX_HALF:
+			hw->forced_speed_duplex = e1000_10_half;
+			break;
+		case SPEED_10 + DUPLEX_FULL:
+			hw->forced_speed_duplex = e1000_10_full;
+			break;
+		case SPEED_100 + DUPLEX_HALF:
+			hw->forced_speed_duplex = e1000_100_half;
+			break;
+		case SPEED_100 + DUPLEX_FULL:
+			hw->forced_speed_duplex = e1000_100_full;
+			break;
+		case SPEED_1000 + DUPLEX_FULL:
+			hw->autoneg = 1;
+			hw->autoneg_advertised = ADVERTISE_1000_FULL;
+			break;
+		case SPEED_1000 + DUPLEX_HALF: /* not supported */
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* reset the link */
+
+	if(netif_running(adapter->netdev)) {
+		e1000_down(adapter);
+		e1000_up(adapter);
+	} else
+		e1000_reset(adapter);
+
+	return 0;
+}
+
+static inline int
+e1000_eeprom_size(struct e1000_hw *hw)
+{
+	if((hw->mac_type > e1000_82544) &&
+	   (E1000_READ_REG(hw, EECD) & E1000_EECD_SIZE))
+		return 512;
+	else
+		return 128;
+}
+
+static void
+e1000_ethtool_gdrvinfo(struct e1000_adapter *adapter,
+                       struct ethtool_drvinfo *drvinfo)
+{
+	strncpy(drvinfo->driver,  e1000_driver_name, 32);
+	strncpy(drvinfo->version, e1000_driver_version, 32);
+	strncpy(drvinfo->fw_version, "N/A", 32);
+	strncpy(drvinfo->bus_info, adapter->pdev->slot_name, 32);
+	drvinfo->n_stats = E1000_STATS_LEN;
+#define E1000_REGS_LEN 32
+	drvinfo->regdump_len  = E1000_REGS_LEN * sizeof(uint32_t);
+	drvinfo->eedump_len  = e1000_eeprom_size(&adapter->hw);
+}
+
+static void
+e1000_ethtool_gregs(struct e1000_adapter *adapter,
+                    struct ethtool_regs *regs, uint32_t *regs_buff)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	regs_buff[0]  = E1000_READ_REG(hw, CTRL);
+	regs_buff[1]  = E1000_READ_REG(hw, STATUS);
+
+	regs_buff[2]  = E1000_READ_REG(hw, RCTL);
+	regs_buff[3]  = E1000_READ_REG(hw, RDLEN);
+	regs_buff[4]  = E1000_READ_REG(hw, RDH);
+	regs_buff[5]  = E1000_READ_REG(hw, RDT);
+	regs_buff[6]  = E1000_READ_REG(hw, RDTR);
+
+	regs_buff[7]  = E1000_READ_REG(hw, TCTL);
+	regs_buff[8]  = E1000_READ_REG(hw, TDLEN);
+	regs_buff[9]  = E1000_READ_REG(hw, TDH);
+	regs_buff[10] = E1000_READ_REG(hw, TDT);
+	regs_buff[11] = E1000_READ_REG(hw, TIDV);
+
+	return;
+}
+
+static int
+e1000_ethtool_geeprom(struct e1000_adapter *adapter,
+                      struct ethtool_eeprom *eeprom, uint16_t *eeprom_buff)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int max_len, first_word, last_word;
+	int ret_val = 0;
+	int i;
+
+	if(eeprom->len == 0) {
+		ret_val = -EINVAL;
+		goto geeprom_error;
+	}
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	max_len = e1000_eeprom_size(hw);
+
+	if(eeprom->offset > eeprom->offset + eeprom->len) {
+		ret_val = -EINVAL;
+		goto geeprom_error;
+	}
+
+	if((eeprom->offset + eeprom->len) > max_len)
+		eeprom->len = (max_len - eeprom->offset);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	for(i = 0; i <= (last_word - first_word); i++)
+		e1000_read_eeprom(hw, first_word + i, &eeprom_buff[i]);
+
+geeprom_error:
+	return ret_val;
+}
+
+static int
+e1000_ethtool_seeprom(struct e1000_adapter *adapter,
+                      struct ethtool_eeprom *eeprom, void *user_data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	uint16_t *eeprom_buff;
+	int max_len, first_word, last_word;
+	void *ptr;
+	int i;
+
+	if(eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if(eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = e1000_eeprom_size(hw);
+
+	if((eeprom->offset + eeprom->len) > max_len)
+		eeprom->len = (max_len - eeprom->offset);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if(eeprom_buff == NULL)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if(eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word */
+		/* only the second byte of the word is being modified */
+		e1000_read_eeprom(hw, first_word, &eeprom_buff[0]);
+		ptr++;
+	}
+	if((eeprom->offset + eeprom->len) & 1) {
+		/* need read/modify/write of last changed EEPROM word */
+		/* only the first byte of the word is being modified */
+		e1000_read_eeprom(hw, last_word,
+		                  &eeprom_buff[last_word - first_word]);
+	}
+	if(copy_from_user(ptr, user_data, eeprom->len)) {
+		kfree(eeprom_buff);
+		return -EFAULT;
+	}
+
+	for(i = 0; i <= (last_word - first_word); i++)
+		e1000_write_eeprom(hw, first_word + i, eeprom_buff[i]);
+
+	/* Update the checksum over the first part of the EEPROM if needed */
+	if(first_word <= EEPROM_CHECKSUM_REG)
+		e1000_update_eeprom_checksum(hw);
+
+	kfree(eeprom_buff);
+
+	return 0;
+}
+
+static void
+e1000_ethtool_gwol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	switch(adapter->hw.device_id) {
+	case E1000_DEV_ID_82542:
+	case E1000_DEV_ID_82543GC_FIBER:
+	case E1000_DEV_ID_82543GC_COPPER:
+	case E1000_DEV_ID_82544EI_FIBER:
+		wol->supported = 0;
+		wol->wolopts   = 0;
+		return;
+
+	case E1000_DEV_ID_82546EB_FIBER:
+		/* Wake events only supported on port A for dual fiber */
+		if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) {
+			wol->supported = 0;
+			wol->wolopts   = 0;
+			return;
+		}
+		/* Fall Through */
+
+	default:
+		wol->supported = WAKE_UCAST | WAKE_MCAST
+			         | WAKE_BCAST | WAKE_MAGIC;
+
+		wol->wolopts = 0;
+		if(adapter->wol & E1000_WUFC_EX)
+			wol->wolopts |= WAKE_UCAST;
+		if(adapter->wol & E1000_WUFC_MC)
+			wol->wolopts |= WAKE_MCAST;
+		if(adapter->wol & E1000_WUFC_BC)
+			wol->wolopts |= WAKE_BCAST;
+		if(adapter->wol & E1000_WUFC_MAG)
+			wol->wolopts |= WAKE_MAGIC;
+		return;
+	}
+}
+
+static int
+e1000_ethtool_swol(struct e1000_adapter *adapter, struct ethtool_wolinfo *wol)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	switch(adapter->hw.device_id) {
+	case E1000_DEV_ID_82542:
+	case E1000_DEV_ID_82543GC_FIBER:
+	case E1000_DEV_ID_82543GC_COPPER:
+	case E1000_DEV_ID_82544EI_FIBER:
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	case E1000_DEV_ID_82546EB_FIBER:
+		/* Wake events only supported on port A for dual fiber */
+		if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)
+			return wol->wolopts ? -EOPNOTSUPP : 0;
+		/* Fall Through */
+
+	default:
+		if(wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE | WAKE_PHY))
+			return -EOPNOTSUPP;
+
+		adapter->wol = 0;
+
+		if(wol->wolopts & WAKE_UCAST)
+			adapter->wol |= E1000_WUFC_EX;
+		if(wol->wolopts & WAKE_MCAST)
+			adapter->wol |= E1000_WUFC_MC;
+		if(wol->wolopts & WAKE_BCAST)
+			adapter->wol |= E1000_WUFC_BC;
+		if(wol->wolopts & WAKE_MAGIC)
+			adapter->wol |= E1000_WUFC_MAG;
+	}
+
+	return 0;
+}
+
+
+/* toggle LED 4 times per second = 2 "blinks" per second */
+#define E1000_ID_INTERVAL	(HZ/4)
+
+/* bit defines for adapter->led_status */
+#define E1000_LED_ON		0
+
+static void
+e1000_led_blink_callback(unsigned long data)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+
+	if(test_and_change_bit(E1000_LED_ON, &adapter->led_status))
+		e1000_led_off(&adapter->hw);
+	else
+		e1000_led_on(&adapter->hw);
+
+	mod_timer(&adapter->blink_timer, jiffies + E1000_ID_INTERVAL);
+}
+
+static int
+e1000_ethtool_led_blink(struct e1000_adapter *adapter, struct ethtool_value *id)
+{
+	if(!adapter->blink_timer.function) {
+		init_timer(&adapter->blink_timer);
+		adapter->blink_timer.function = e1000_led_blink_callback;
+		adapter->blink_timer.data = (unsigned long) adapter;
+	}
+
+	e1000_setup_led(&adapter->hw);
+	mod_timer(&adapter->blink_timer, jiffies);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if(id->data)
+		schedule_timeout(id->data * HZ);
+	else
+		schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+
+	del_timer_sync(&adapter->blink_timer);
+	e1000_led_off(&adapter->hw);
+	clear_bit(E1000_LED_ON, &adapter->led_status);
+	e1000_cleanup_led(&adapter->hw);
+
+	return 0;
+}
+
+int
+e1000_ethtool_ioctl(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	void *addr = ifr->ifr_data;
+	uint32_t cmd;
+
+	if(get_user(cmd, (uint32_t *) addr))
+		return -EFAULT;
+
+	switch(cmd) {
+	case ETHTOOL_GSET: {
+		struct ethtool_cmd ecmd = {ETHTOOL_GSET};
+		e1000_ethtool_gset(adapter, &ecmd);
+		if(copy_to_user(addr, &ecmd, sizeof(ecmd)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SSET: {
+		struct ethtool_cmd ecmd;
+		if(!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if(copy_from_user(&ecmd, addr, sizeof(ecmd)))
+			return -EFAULT;
+		return e1000_ethtool_sset(adapter, &ecmd);
+	}
+	case ETHTOOL_GDRVINFO: {
+		struct ethtool_drvinfo drvinfo = {ETHTOOL_GDRVINFO};
+		e1000_ethtool_gdrvinfo(adapter, &drvinfo);
+		if(copy_to_user(addr, &drvinfo, sizeof(drvinfo)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_GSTRINGS: {
+		struct ethtool_gstrings gstrings = { ETHTOOL_GSTRINGS };
+		char *strings = NULL;
+
+		if(copy_from_user(&gstrings, addr, sizeof(gstrings)))
+			return -EFAULT;
+		switch(gstrings.string_set) {
+		case ETH_SS_STATS:
+			gstrings.len = E1000_STATS_LEN;
+			strings = *e1000_gstrings_stats;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		if(copy_to_user(addr, &gstrings, sizeof(gstrings)))
+			return -EFAULT;
+		addr += offsetof(struct ethtool_gstrings, data);
+		if(copy_to_user(addr, strings,
+		   gstrings.len * ETH_GSTRING_LEN))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_GREGS: {
+		struct ethtool_regs regs = {ETHTOOL_GREGS};
+		uint32_t regs_buff[E1000_REGS_LEN];
+
+		if(copy_from_user(&regs, addr, sizeof(regs)))
+			return -EFAULT;
+		e1000_ethtool_gregs(adapter, &regs, regs_buff);
+		if(copy_to_user(addr, &regs, sizeof(regs)))
+			return -EFAULT;
+
+		addr += offsetof(struct ethtool_regs, data);
+		if(copy_to_user(addr, regs_buff, regs.len))
+			return -EFAULT;
+
+		return 0;
+	}
+	case ETHTOOL_NWAY_RST: {
+		if(!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if(netif_running(netdev)) {
+			e1000_down(adapter);
+			e1000_up(adapter);
+		}
+		return 0;
+	}
+	case ETHTOOL_PHYS_ID: {
+		struct ethtool_value id;
+		if(copy_from_user(&id, addr, sizeof(id)))
+			return -EFAULT;
+		return e1000_ethtool_led_blink(adapter, &id);
+	}
+	case ETHTOOL_GLINK: {
+		struct ethtool_value link = {ETHTOOL_GLINK};
+		link.data = netif_carrier_ok(netdev);
+		if(copy_to_user(addr, &link, sizeof(link)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_GWOL: {
+		struct ethtool_wolinfo wol = {ETHTOOL_GWOL};
+		e1000_ethtool_gwol(adapter, &wol);
+		if(copy_to_user(addr, &wol, sizeof(wol)) != 0)
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SWOL: {
+		struct ethtool_wolinfo wol;
+		if(!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if(copy_from_user(&wol, addr, sizeof(wol)) != 0)
+			return -EFAULT;
+		return e1000_ethtool_swol(adapter, &wol);
+	}
+	case ETHTOOL_GEEPROM: {
+		struct ethtool_eeprom eeprom = {ETHTOOL_GEEPROM};
+		uint16_t *eeprom_buff;
+		void *ptr;
+		int max_len, err = 0;
+
+		max_len = e1000_eeprom_size(&adapter->hw);
+
+		eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+
+		if(eeprom_buff == NULL)
+			return -ENOMEM;
+
+		if(copy_from_user(&eeprom, addr, sizeof(eeprom))) {
+			err = -EFAULT;
+			goto err_geeprom_ioctl;
+		}
+
+		if((err = e1000_ethtool_geeprom(adapter, &eeprom,
+						eeprom_buff)))
+			goto err_geeprom_ioctl;
+
+		if(copy_to_user(addr, &eeprom, sizeof(eeprom))) {
+			err = -EFAULT;
+			goto err_geeprom_ioctl;
+		}
+
+		addr += offsetof(struct ethtool_eeprom, data);
+		ptr = ((void *)eeprom_buff) + (eeprom.offset & 1);
+
+		if(copy_to_user(addr, ptr, eeprom.len))
+			err = -EFAULT;
+
+err_geeprom_ioctl:
+		kfree(eeprom_buff);
+		return err;
+	}
+	case ETHTOOL_SEEPROM: {
+		struct ethtool_eeprom eeprom;
+
+		if(!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		if(copy_from_user(&eeprom, addr, sizeof(eeprom)))
+			return -EFAULT;
+
+		addr += offsetof(struct ethtool_eeprom, data);
+		return e1000_ethtool_seeprom(adapter, &eeprom, addr);
+	}
+	case ETHTOOL_GSTATS: {
+		struct {
+			struct ethtool_stats cmd;
+			uint64_t data[E1000_STATS_LEN];
+		} stats = { {ETHTOOL_GSTATS, E1000_STATS_LEN} };
+		int i;
+
+		for(i = 0; i < E1000_STATS_LEN; i++)
+			stats.data[i] =
+				((unsigned long *)&adapter->net_stats)[i];
+		if(copy_to_user(addr, &stats, sizeof(stats)))
+			return -EFAULT;
+		return 0;
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+
diff --git a/xen/drivers/net/e1000/e1000_hw.c b/xen/drivers/net/e1000/e1000_hw.c
new file mode 100644
index 0000000000..1d70dab937
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_hw.c
@@ -0,0 +1,3610 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* e1000_hw.c
+ * Shared functions for accessing and configuring the MAC
+ */
+
+#include "e1000_hw.h"
+
+static int32_t e1000_setup_fiber_link(struct e1000_hw *hw);
+static int32_t e1000_setup_copper_link(struct e1000_hw *hw);
+static int32_t e1000_phy_force_speed_duplex(struct e1000_hw *hw);
+static int32_t e1000_config_mac_to_phy(struct e1000_hw *hw);
+static int32_t e1000_force_mac_fc(struct e1000_hw *hw);
+static void e1000_raise_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl);
+static void e1000_lower_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl);
+static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, uint32_t data, uint16_t count);
+static uint16_t e1000_shift_in_mdi_bits(struct e1000_hw *hw);
+static int32_t e1000_phy_reset_dsp(struct e1000_hw *hw);
+static void e1000_raise_ee_clk(struct e1000_hw *hw, uint32_t *eecd);
+static void e1000_lower_ee_clk(struct e1000_hw *hw, uint32_t *eecd);
+static void e1000_shift_out_ee_bits(struct e1000_hw *hw, uint16_t data, uint16_t count);
+static uint16_t e1000_shift_in_ee_bits(struct e1000_hw *hw);
+static void e1000_setup_eeprom(struct e1000_hw *hw);
+static void e1000_clock_eeprom(struct e1000_hw *hw);
+static void e1000_cleanup_eeprom(struct e1000_hw *hw);
+static void e1000_standby_eeprom(struct e1000_hw *hw);
+static int32_t e1000_id_led_init(struct e1000_hw * hw);
+
+/******************************************************************************
+ * Set the mac type member in the hw struct.
+ * 
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_set_mac_type(struct e1000_hw *hw)
+{
+    DEBUGFUNC("e1000_set_mac_type");
+
+    switch (hw->device_id) {
+    case E1000_DEV_ID_82542:
+        switch (hw->revision_id) {
+        case E1000_82542_2_0_REV_ID:
+            hw->mac_type = e1000_82542_rev2_0;
+            break;
+        case E1000_82542_2_1_REV_ID:
+            hw->mac_type = e1000_82542_rev2_1;
+            break;
+        default:
+            /* Invalid 82542 revision ID */
+            return -E1000_ERR_MAC_TYPE;
+        }
+        break;
+    case E1000_DEV_ID_82543GC_FIBER:
+    case E1000_DEV_ID_82543GC_COPPER:
+        hw->mac_type = e1000_82543;
+        break;
+    case E1000_DEV_ID_82544EI_COPPER:
+    case E1000_DEV_ID_82544EI_FIBER:
+    case E1000_DEV_ID_82544GC_COPPER:
+    case E1000_DEV_ID_82544GC_LOM:
+        hw->mac_type = e1000_82544;
+        break;
+    case E1000_DEV_ID_82540EM:
+    case E1000_DEV_ID_82540EM_LOM:
+    case E1000_DEV_ID_82540EP:
+    case E1000_DEV_ID_82540EP_LOM:
+    case E1000_DEV_ID_82540EP_LP:
+        hw->mac_type = e1000_82540;
+        break;
+    case E1000_DEV_ID_82545EM_COPPER:
+    case E1000_DEV_ID_82545EM_FIBER:
+        hw->mac_type = e1000_82545;
+        break;
+    case E1000_DEV_ID_82546EB_COPPER:
+    case E1000_DEV_ID_82546EB_FIBER:
+        hw->mac_type = e1000_82546;
+        break;
+    default:
+        /* Should never have loaded on this device */
+        return -E1000_ERR_MAC_TYPE;
+    }
+    return E1000_SUCCESS;
+}
+/******************************************************************************
+ * Reset the transmit and receive units; mask and clear all interrupts.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_reset_hw(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    uint32_t ctrl_ext;
+    uint32_t icr;
+    uint32_t manc;
+
+    DEBUGFUNC("e1000_reset_hw");
+    /* For 82542 (rev 2.0), disable MWI before issuing a device reset */
+    if(hw->mac_type == e1000_82542_rev2_0) {
+        DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
+        e1000_pci_clear_mwi(hw);
+    }
+
+    /* Clear interrupt mask to stop board from generating interrupts */
+    DEBUGOUT("Masking off all interrupts\n");
+    E1000_WRITE_REG(hw, IMC, 0xffffffff);
+
+    /* Disable the Transmit and Receive units.  Then delay to allow
+     * any pending transactions to complete before we hit the MAC with
+     * the global reset.
+     */
+    E1000_WRITE_REG(hw, RCTL, 0);
+    E1000_WRITE_REG(hw, TCTL, E1000_TCTL_PSP);
+    E1000_WRITE_FLUSH(hw);
+
+    /* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */
+    hw->tbi_compatibility_on = FALSE;
+
+    /* Delay to allow any outstanding PCI transactions to complete before
+     * resetting the device
+     */ 
+    DEBUGOUT("Before delay\n");
+    msec_delay(10);
+
+    /* Issue a global reset to the MAC.  This will reset the chip's
+     * transmit, receive, DMA, and link units.  It will not effect
+     * the current PCI configuration.  The global reset bit is self-
+     * clearing, and should clear within a microsecond.
+     */
+    DEBUGOUT("Issuing a global reset to MAC\n");
+    ctrl = E1000_READ_REG(hw, CTRL);
+
+    if(hw->mac_type > e1000_82543)
+        E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST));
+    else
+        E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST));
+
+    /* Force a reload from the EEPROM if necessary */
+    if(hw->mac_type < e1000_82540) {
+        /* Wait for reset to complete */
+        udelay(10);
+        ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
+        ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+        E1000_WRITE_FLUSH(hw);
+        /* Wait for EEPROM reload */
+        msec_delay(2);
+    } else {
+        /* Wait for EEPROM reload (it happens automatically) */
+        msec_delay(4);
+        /* Dissable HW ARPs on ASF enabled adapters */
+        manc = E1000_READ_REG(hw, MANC);
+        manc &= ~(E1000_MANC_ARP_EN);
+        E1000_WRITE_REG(hw, MANC, manc);
+    }
+    
+    /* Clear interrupt mask to stop board from generating interrupts */
+    DEBUGOUT("Masking off all interrupts\n");
+    E1000_WRITE_REG(hw, IMC, 0xffffffff);
+
+    /* Clear any pending interrupt events. */
+    icr = E1000_READ_REG(hw, ICR);
+
+    /* If MWI was previously enabled, reenable it. */
+    if(hw->mac_type == e1000_82542_rev2_0) {
+        if(hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
+            e1000_pci_set_mwi(hw);
+    }
+}
+
+/******************************************************************************
+ * Performs basic configuration of the adapter.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * 
+ * Assumes that the controller has previously been reset and is in a 
+ * post-reset uninitialized state. Initializes the receive address registers,
+ * multicast table, and VLAN filter table. Calls routines to setup link
+ * configuration and flow control settings. Clears all on-chip counters. Leaves
+ * the transmit and receive units disabled and uninitialized.
+ *****************************************************************************/
+int32_t
+e1000_init_hw(struct e1000_hw *hw)
+{
+    uint32_t ctrl, status;
+    uint32_t i;
+    int32_t ret_val;
+    uint16_t pcix_cmd_word;
+    uint16_t pcix_stat_hi_word;
+    uint16_t cmd_mmrbc;
+    uint16_t stat_mmrbc;
+
+    DEBUGFUNC("e1000_init_hw");
+
+    /* Initialize Identification LED */
+    ret_val = e1000_id_led_init(hw);
+    if(ret_val < 0) {
+        DEBUGOUT("Error Initializing Identification LED\n");
+        return ret_val;
+    }
+    
+    /* Set the Media Type and exit with error if it is not valid. */
+    if(hw->mac_type != e1000_82543) {
+        /* tbi_compatibility is only valid on 82543 */
+        hw->tbi_compatibility_en = FALSE;
+    }
+
+    if(hw->mac_type >= e1000_82543) {
+        status = E1000_READ_REG(hw, STATUS);
+        if(status & E1000_STATUS_TBIMODE) {
+            hw->media_type = e1000_media_type_fiber;
+            /* tbi_compatibility not valid on fiber */
+            hw->tbi_compatibility_en = FALSE;
+        } else {
+            hw->media_type = e1000_media_type_copper;
+        }
+    } else {
+        /* This is an 82542 (fiber only) */
+        hw->media_type = e1000_media_type_fiber;
+    }
+
+    /* Disabling VLAN filtering. */
+    DEBUGOUT("Initializing the IEEE VLAN\n");
+    E1000_WRITE_REG(hw, VET, 0);
+
+    e1000_clear_vfta(hw);
+
+    /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */
+    if(hw->mac_type == e1000_82542_rev2_0) {
+        DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
+        e1000_pci_clear_mwi(hw);
+        E1000_WRITE_REG(hw, RCTL, E1000_RCTL_RST);
+        E1000_WRITE_FLUSH(hw);
+        msec_delay(5);
+    }
+
+    /* Setup the receive address. This involves initializing all of the Receive
+     * Address Registers (RARs 0 - 15).
+     */
+    e1000_init_rx_addrs(hw);
+
+    /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */
+    if(hw->mac_type == e1000_82542_rev2_0) {
+        E1000_WRITE_REG(hw, RCTL, 0);
+        E1000_WRITE_FLUSH(hw);
+        msec_delay(1);
+        if(hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
+            e1000_pci_set_mwi(hw);
+    }
+
+    /* Zero out the Multicast HASH table */
+    DEBUGOUT("Zeroing the MTA\n");
+    for(i = 0; i < E1000_MC_TBL_SIZE; i++)
+        E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+
+    /* Set the PCI priority bit correctly in the CTRL register.  This
+     * determines if the adapter gives priority to receives, or if it
+     * gives equal priority to transmits and receives.
+     */
+    if(hw->dma_fairness) {
+        ctrl = E1000_READ_REG(hw, CTRL);
+        E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PRIOR);
+    }
+
+    /* Workaround for PCI-X problem when BIOS sets MMRBC incorrectly. */
+    if(hw->bus_type == e1000_bus_type_pcix) {
+        e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word);
+        e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word);
+        cmd_mmrbc = (pcix_cmd_word & PCIX_COMMAND_MMRBC_MASK) >>
+            PCIX_COMMAND_MMRBC_SHIFT;
+        stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >>
+            PCIX_STATUS_HI_MMRBC_SHIFT;
+        if(stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K)
+            stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K;
+        if(cmd_mmrbc > stat_mmrbc) {
+            pcix_cmd_word &= ~PCIX_COMMAND_MMRBC_MASK;
+            pcix_cmd_word |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT;
+            e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word);
+        }
+    }
+
+    /* Call a subroutine to configure the link and setup flow control. */
+    ret_val = e1000_setup_link(hw);
+
+    /* Set the transmit descriptor write-back policy */
+    if(hw->mac_type > e1000_82544) {
+        ctrl = E1000_READ_REG(hw, TXDCTL);
+        ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB;
+        E1000_WRITE_REG(hw, TXDCTL, ctrl);
+    }
+
+    /* Clear all of the statistics registers (clear on read).  It is
+     * important that we do this after we have tried to establish link
+     * because the symbol error count will increment wildly if there
+     * is no link.
+     */
+    e1000_clear_hw_cntrs(hw);
+
+    return ret_val;
+}
+
+/******************************************************************************
+ * Configures flow control and link settings.
+ * 
+ * hw - Struct containing variables accessed by shared code
+ * 
+ * Determines which flow control settings to use. Calls the apropriate media-
+ * specific link configuration function. Configures the flow control settings.
+ * Assuming the adapter has a valid link partner, a valid link should be
+ * established. Assumes the hardware has previously been reset and the 
+ * transmitter and receiver are not enabled.
+ *****************************************************************************/
+int32_t
+e1000_setup_link(struct e1000_hw *hw)
+{
+    uint32_t ctrl_ext;
+    int32_t ret_val;
+    uint16_t eeprom_data;
+
+    DEBUGFUNC("e1000_setup_link");
+
+    /* Read and store word 0x0F of the EEPROM. This word contains bits
+     * that determine the hardware's default PAUSE (flow control) mode,
+     * a bit that determines whether the HW defaults to enabling or
+     * disabling auto-negotiation, and the direction of the
+     * SW defined pins. If there is no SW over-ride of the flow
+     * control setting, then the variable hw->fc will
+     * be initialized based on a value in the EEPROM.
+     */
+    if(e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data) < 0) {
+        DEBUGOUT("EEPROM Read Error\n");
+        return -E1000_ERR_EEPROM;
+    }
+
+    if(hw->fc == e1000_fc_default) {
+        if((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0)
+            hw->fc = e1000_fc_none;
+        else if((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 
+                EEPROM_WORD0F_ASM_DIR)
+            hw->fc = e1000_fc_tx_pause;
+        else
+            hw->fc = e1000_fc_full;
+    }
+
+    /* We want to save off the original Flow Control configuration just
+     * in case we get disconnected and then reconnected into a different
+     * hub or switch with different Flow Control capabilities.
+     */
+    if(hw->mac_type == e1000_82542_rev2_0)
+        hw->fc &= (~e1000_fc_tx_pause);
+
+    if((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1))
+        hw->fc &= (~e1000_fc_rx_pause);
+
+    hw->original_fc = hw->fc;
+
+    DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc);
+
+    /* Take the 4 bits from EEPROM word 0x0F that determine the initial
+     * polarity value for the SW controlled pins, and setup the
+     * Extended Device Control reg with that info.
+     * This is needed because one of the SW controlled pins is used for
+     * signal detection.  So this should be done before e1000_setup_pcs_link()
+     * or e1000_phy_setup() is called.
+     */
+    if(hw->mac_type == e1000_82543) {
+        ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) << 
+                    SWDPIO__EXT_SHIFT);
+        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+    }
+
+    /* Call the necessary subroutine to configure the link. */
+    ret_val = (hw->media_type == e1000_media_type_fiber) ?
+              e1000_setup_fiber_link(hw) :
+              e1000_setup_copper_link(hw);
+
+    /* Initialize the flow control address, type, and PAUSE timer
+     * registers to their default values.  This is done even if flow
+     * control is disabled, because it does not hurt anything to
+     * initialize these registers.
+     */
+    DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
+
+    E1000_WRITE_REG(hw, FCAL, FLOW_CONTROL_ADDRESS_LOW);
+    E1000_WRITE_REG(hw, FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+    E1000_WRITE_REG(hw, FCT, FLOW_CONTROL_TYPE);
+    E1000_WRITE_REG(hw, FCTTV, hw->fc_pause_time);
+
+    /* Set the flow control receive threshold registers.  Normally,
+     * these registers will be set to a default threshold that may be
+     * adjusted later by the driver's runtime code.  However, if the
+     * ability to transmit pause frames in not enabled, then these
+     * registers will be set to 0. 
+     */
+    if(!(hw->fc & e1000_fc_tx_pause)) {
+        E1000_WRITE_REG(hw, FCRTL, 0);
+        E1000_WRITE_REG(hw, FCRTH, 0);
+    } else {
+        /* We need to set up the Receive Threshold high and low water marks
+         * as well as (optionally) enabling the transmission of XON frames.
+         */
+        if(hw->fc_send_xon) {
+            E1000_WRITE_REG(hw, FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE));
+            E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water);
+        } else {
+            E1000_WRITE_REG(hw, FCRTL, hw->fc_low_water);
+            E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water);
+        }
+    }
+    return ret_val;
+}
+
+/******************************************************************************
+ * Sets up link for a fiber based adapter
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Manipulates Physical Coding Sublayer functions in order to configure
+ * link. Assumes the hardware has been previously reset and the transmitter
+ * and receiver are not enabled.
+ *****************************************************************************/
+static int32_t 
+e1000_setup_fiber_link(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    uint32_t status;
+    uint32_t txcw = 0;
+    uint32_t i;
+    uint32_t signal;
+    int32_t ret_val;
+
+    DEBUGFUNC("e1000_setup_fiber_link");
+
+    /* On adapters with a MAC newer that 82544, SW Defineable pin 1 will be 
+     * set when the optics detect a signal. On older adapters, it will be 
+     * cleared when there is a signal
+     */
+    ctrl = E1000_READ_REG(hw, CTRL);
+    if(hw->mac_type > e1000_82544) signal = E1000_CTRL_SWDPIN1;
+    else signal = 0;
+   
+    /* Take the link out of reset */
+    ctrl &= ~(E1000_CTRL_LRST);
+    
+    e1000_config_collision_dist(hw);
+
+    /* Check for a software override of the flow control settings, and setup
+     * the device accordingly.  If auto-negotiation is enabled, then software
+     * will have to set the "PAUSE" bits to the correct value in the Tranmsit
+     * Config Word Register (TXCW) and re-start auto-negotiation.  However, if
+     * auto-negotiation is disabled, then software will have to manually 
+     * configure the two flow control enable bits in the CTRL register.
+     *
+     * The possible values of the "fc" parameter are:
+     *      0:  Flow control is completely disabled
+     *      1:  Rx flow control is enabled (we can receive pause frames, but 
+     *          not send pause frames).
+     *      2:  Tx flow control is enabled (we can send pause frames but we do
+     *          not support receiving pause frames).
+     *      3:  Both Rx and TX flow control (symmetric) are enabled.
+     */
+    switch (hw->fc) {
+    case e1000_fc_none:
+        /* Flow control is completely disabled by a software over-ride. */
+        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
+        break;
+    case e1000_fc_rx_pause:
+        /* RX Flow control is enabled and TX Flow control is disabled by a 
+         * software over-ride. Since there really isn't a way to advertise 
+         * that we are capable of RX Pause ONLY, we will advertise that we
+         * support both symmetric and asymmetric RX PAUSE. Later, we will
+         *  disable the adapter's ability to send PAUSE frames.
+         */
+        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+        break;
+    case e1000_fc_tx_pause:
+        /* TX Flow control is enabled, and RX Flow control is disabled, by a 
+         * software over-ride.
+         */
+        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
+        break;
+    case e1000_fc_full:
+        /* Flow control (both RX and TX) is enabled by a software over-ride. */
+        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+        break;
+    default:
+        DEBUGOUT("Flow control param set incorrectly\n");
+        return -E1000_ERR_CONFIG;
+        break;
+    }
+
+    /* Since auto-negotiation is enabled, take the link out of reset (the link
+     * will be in reset, because we previously reset the chip). This will
+     * restart auto-negotiation.  If auto-neogtiation is successful then the
+     * link-up status bit will be set and the flow control enable bits (RFCE
+     * and TFCE) will be set according to their negotiated value.
+     */
+    DEBUGOUT("Auto-negotiation enabled\n");
+
+    E1000_WRITE_REG(hw, TXCW, txcw);
+    E1000_WRITE_REG(hw, CTRL, ctrl);
+    E1000_WRITE_FLUSH(hw);
+
+    hw->txcw = txcw;
+    msec_delay(1);
+
+    /* If we have a signal (the cable is plugged in) then poll for a "Link-Up"
+     * indication in the Device Status Register.  Time-out if a link isn't 
+     * seen in 500 milliseconds seconds (Auto-negotiation should complete in 
+     * less than 500 milliseconds even if the other end is doing it in SW).
+     */
+    if((E1000_READ_REG(hw, CTRL) & E1000_CTRL_SWDPIN1) == signal) {
+        DEBUGOUT("Looking for Link\n");
+        for(i = 0; i < (LINK_UP_TIMEOUT / 10); i++) {
+            msec_delay(10);
+            status = E1000_READ_REG(hw, STATUS);
+            if(status & E1000_STATUS_LU) break;
+        }
+        if(i == (LINK_UP_TIMEOUT / 10)) {
+            /* AutoNeg failed to achieve a link, so we'll call 
+             * e1000_check_for_link. This routine will force the link up if we
+             * detect a signal. This will allow us to communicate with
+             * non-autonegotiating link partners.
+             */
+            DEBUGOUT("Never got a valid link from auto-neg!!!\n");
+            hw->autoneg_failed = 1;
+            ret_val = e1000_check_for_link(hw);
+            if(ret_val < 0) {
+                DEBUGOUT("Error while checking for link\n");
+                return ret_val;
+            }
+            hw->autoneg_failed = 0;
+        } else {
+            hw->autoneg_failed = 0;
+            DEBUGOUT("Valid Link Found\n");
+        }
+    } else {
+        DEBUGOUT("No Signal Detected\n");
+    }
+    return 0;
+}
+
+/******************************************************************************
+* Detects which PHY is present and the speed and duplex
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+static int32_t 
+e1000_setup_copper_link(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    int32_t ret_val;
+    uint16_t i;
+    uint16_t phy_data;
+
+    DEBUGFUNC("e1000_setup_copper_link");
+
+    ctrl = E1000_READ_REG(hw, CTRL);
+    /* With 82543, we need to force speed and duplex on the MAC equal to what
+     * the PHY speed and duplex configuration is. In addition, we need to
+     * perform a hardware reset on the PHY to take it out of reset.
+     */
+    if(hw->mac_type > e1000_82543) {
+        ctrl |= E1000_CTRL_SLU;
+        ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+    } else {
+        ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU);
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        e1000_phy_hw_reset(hw);
+    }
+
+    /* Make sure we have a valid PHY */
+    ret_val = e1000_detect_gig_phy(hw);
+    if(ret_val < 0) {
+        DEBUGOUT("Error, did not detect valid phy.\n");
+        return ret_val;
+    }
+    DEBUGOUT1("Phy ID = %x \n", hw->phy_id);
+
+    /* Enable CRS on TX. This must be set for half-duplex operation. */
+    if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+    /* Options:
+     *   MDI/MDI-X = 0 (default)
+     *   0 - Auto for all speeds
+     *   1 - MDI mode
+     *   2 - MDI-X mode
+     *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+     */
+    phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+    switch (hw->mdix) {
+    case 1:
+        phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+        break;
+    case 2:
+        phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+        break;
+    case 3:
+        phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+        break;
+    case 0:
+    default:
+        phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+        break;
+    }
+
+    /* Options:
+     *   disable_polarity_correction = 0 (default)
+     *       Automatic Correction for Reversed Cable Polarity
+     *   0 - Disabled
+     *   1 - Enabled
+     */
+    phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+    if(hw->disable_polarity_correction == 1)
+        phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+    if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    /* Force TX_CLK in the Extended PHY Specific Control Register
+     * to 25MHz clock.
+     */
+    if(e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+    if (hw->phy_revision < M88E1011_I_REV_4) {
+        /* Configure Master and Slave downshift values */
+        phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+                      M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+        phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+                     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+        if(e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data) < 0) {
+            DEBUGOUT("PHY Write Error\n");
+            return -E1000_ERR_PHY;
+        }
+    }
+
+    /* SW Reset the PHY so all changes take effect */
+    ret_val = e1000_phy_reset(hw);
+    if(ret_val < 0) {
+        DEBUGOUT("Error Resetting the PHY\n");
+        return ret_val;
+    }
+    
+    /* Options:
+     *   autoneg = 1 (default)
+     *      PHY will advertise value(s) parsed from
+     *      autoneg_advertised and fc
+     *   autoneg = 0
+     *      PHY will be set to 10H, 10F, 100H, or 100F
+     *      depending on value parsed from forced_speed_duplex.
+     */
+
+    /* Is autoneg enabled?  This is enabled by default or by software override.
+     * If so, call e1000_phy_setup_autoneg routine to parse the
+     * autoneg_advertised and fc options. If autoneg is NOT enabled, then the
+     * user should have provided a speed/duplex override.  If so, then call
+     * e1000_phy_force_speed_duplex to parse and set this up.
+     */
+    if(hw->autoneg) {
+        /* Perform some bounds checking on the hw->autoneg_advertised
+         * parameter.  If this variable is zero, then set it to the default.
+         */
+        hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+        /* If autoneg_advertised is zero, we assume it was not defaulted
+         * by the calling code so we set to advertise full capability.
+         */
+        if(hw->autoneg_advertised == 0)
+            hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+        DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
+        ret_val = e1000_phy_setup_autoneg(hw);
+        if(ret_val < 0) {
+            DEBUGOUT("Error Setting up Auto-Negotiation\n");
+            return ret_val;
+        }
+        DEBUGOUT("Restarting Auto-Neg\n");
+
+        /* Restart auto-negotiation by setting the Auto Neg Enable bit and
+         * the Auto Neg Restart bit in the PHY control register.
+         */
+        if(e1000_read_phy_reg(hw, PHY_CTRL, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+        phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+        if(e1000_write_phy_reg(hw, PHY_CTRL, phy_data) < 0) {
+            DEBUGOUT("PHY Write Error\n");
+            return -E1000_ERR_PHY;
+        }
+
+        /* Does the user want to wait for Auto-Neg to complete here, or
+         * check at a later time (for example, callback routine).
+         */
+        if(hw->wait_autoneg_complete) {
+            ret_val = e1000_wait_autoneg(hw);
+            if(ret_val < 0) {
+                DEBUGOUT("Error while waiting for autoneg to complete\n");
+                return ret_val;
+            }
+        }
+    } else {
+        DEBUGOUT("Forcing speed and duplex\n");
+        ret_val = e1000_phy_force_speed_duplex(hw);
+        if(ret_val < 0) {
+            DEBUGOUT("Error Forcing Speed and Duplex\n");
+            return ret_val;
+        }
+    }
+
+    /* Check link status. Wait up to 100 microseconds for link to become
+     * valid.
+     */
+    for(i = 0; i < 10; i++) {
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+        if(phy_data & MII_SR_LINK_STATUS) {
+            /* We have link, so we need to finish the config process:
+             *   1) Set up the MAC to the current PHY speed/duplex
+             *      if we are on 82543.  If we
+             *      are on newer silicon, we only need to configure
+             *      collision distance in the Transmit Control Register.
+             *   2) Set up flow control on the MAC to that established with
+             *      the link partner.
+             */
+            if(hw->mac_type >= e1000_82544) {
+                e1000_config_collision_dist(hw);
+            } else {
+                ret_val = e1000_config_mac_to_phy(hw);
+                if(ret_val < 0) {
+                    DEBUGOUT("Error configuring MAC to PHY settings\n");
+                    return ret_val;
+                  }
+            }
+            ret_val = e1000_config_fc_after_link_up(hw);
+            if(ret_val < 0) {
+                DEBUGOUT("Error Configuring Flow Control\n");
+                return ret_val;
+            }
+            DEBUGOUT("Valid link established!!!\n");
+            return 0;
+        }
+        udelay(10);
+    }
+
+    DEBUGOUT("Unable to establish link!!!\n");
+    return 0;
+}
+
+/******************************************************************************
+* Configures PHY autoneg and flow control advertisement settings
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+int32_t
+e1000_phy_setup_autoneg(struct e1000_hw *hw)
+{
+    uint16_t mii_autoneg_adv_reg;
+    uint16_t mii_1000t_ctrl_reg;
+
+    DEBUGFUNC("e1000_phy_setup_autoneg");
+
+    /* Read the MII Auto-Neg Advertisement Register (Address 4). */
+    if(e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    /* Read the MII 1000Base-T Control Register (Address 9). */
+    if(e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    /* Need to parse both autoneg_advertised and fc and set up
+     * the appropriate PHY registers.  First we will parse for
+     * autoneg_advertised software override.  Since we can advertise
+     * a plethora of combinations, we need to check each bit
+     * individually.
+     */
+
+    /* First we clear all the 10/100 mb speed bits in the Auto-Neg
+     * Advertisement Register (Address 4) and the 1000 mb speed bits in
+     * the  1000Base-T Control Register (Address 9).
+     */
+    mii_autoneg_adv_reg &= ~REG4_SPEED_MASK;
+    mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
+
+    DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised);
+
+    /* Do we want to advertise 10 Mb Half Duplex? */
+    if(hw->autoneg_advertised & ADVERTISE_10_HALF) {
+        DEBUGOUT("Advertise 10mb Half duplex\n");
+        mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+    }
+
+    /* Do we want to advertise 10 Mb Full Duplex? */
+    if(hw->autoneg_advertised & ADVERTISE_10_FULL) {
+        DEBUGOUT("Advertise 10mb Full duplex\n");
+        mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+    }
+
+    /* Do we want to advertise 100 Mb Half Duplex? */
+    if(hw->autoneg_advertised & ADVERTISE_100_HALF) {
+        DEBUGOUT("Advertise 100mb Half duplex\n");
+        mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+    }
+
+    /* Do we want to advertise 100 Mb Full Duplex? */
+    if(hw->autoneg_advertised & ADVERTISE_100_FULL) {
+        DEBUGOUT("Advertise 100mb Full duplex\n");
+        mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+    }
+
+    /* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+    if(hw->autoneg_advertised & ADVERTISE_1000_HALF) {
+        DEBUGOUT("Advertise 1000mb Half duplex requested, request denied!\n");
+    }
+
+    /* Do we want to advertise 1000 Mb Full Duplex? */
+    if(hw->autoneg_advertised & ADVERTISE_1000_FULL) {
+        DEBUGOUT("Advertise 1000mb Full duplex\n");
+        mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+    }
+
+    /* Check for a software override of the flow control settings, and
+     * setup the PHY advertisement registers accordingly.  If
+     * auto-negotiation is enabled, then software will have to set the
+     * "PAUSE" bits to the correct value in the Auto-Negotiation
+     * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-negotiation.
+     *
+     * The possible values of the "fc" parameter are:
+     *      0:  Flow control is completely disabled
+     *      1:  Rx flow control is enabled (we can receive pause frames
+     *          but not send pause frames).
+     *      2:  Tx flow control is enabled (we can send pause frames
+     *          but we do not support receiving pause frames).
+     *      3:  Both Rx and TX flow control (symmetric) are enabled.
+     *  other:  No software override.  The flow control configuration
+     *          in the EEPROM is used.
+     */
+    switch (hw->fc) {
+    case e1000_fc_none: /* 0 */
+        /* Flow control (RX & TX) is completely disabled by a
+         * software over-ride.
+         */
+        mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+        break;
+    case e1000_fc_rx_pause: /* 1 */
+        /* RX Flow control is enabled, and TX Flow control is
+         * disabled, by a software over-ride.
+         */
+        /* Since there really isn't a way to advertise that we are
+         * capable of RX Pause ONLY, we will advertise that we
+         * support both symmetric and asymmetric RX PAUSE.  Later
+         * (in e1000_config_fc_after_link_up) we will disable the
+         *hw's ability to send PAUSE frames.
+         */
+        mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+        break;
+    case e1000_fc_tx_pause: /* 2 */
+        /* TX Flow control is enabled, and RX Flow control is
+         * disabled, by a software over-ride.
+         */
+        mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+        mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+        break;
+    case e1000_fc_full: /* 3 */
+        /* Flow control (both RX and TX) is enabled by a software
+         * over-ride.
+         */
+        mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+        break;
+    default:
+        DEBUGOUT("Flow control param set incorrectly\n");
+        return -E1000_ERR_CONFIG;
+    }
+
+    if(e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+    if(e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+    return 0;
+}
+
+/******************************************************************************
+* Force PHY speed and duplex settings to hw->forced_speed_duplex
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+static int32_t
+e1000_phy_force_speed_duplex(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    int32_t ret_val;
+    uint16_t mii_ctrl_reg;
+    uint16_t mii_status_reg;
+    uint16_t phy_data;
+    uint16_t i;
+
+    DEBUGFUNC("e1000_phy_force_speed_duplex");
+
+    /* Turn off Flow control if we are forcing speed and duplex. */
+    hw->fc = e1000_fc_none;
+
+    DEBUGOUT1("hw->fc = %d\n", hw->fc);
+
+    /* Read the Device Control Register. */
+    ctrl = E1000_READ_REG(hw, CTRL);
+
+    /* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */
+    ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+    ctrl &= ~(DEVICE_SPEED_MASK);
+
+    /* Clear the Auto Speed Detect Enable bit. */
+    ctrl &= ~E1000_CTRL_ASDE;
+
+    /* Read the MII Control Register. */
+    if(e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    /* We need to disable autoneg in order to force link and duplex. */
+
+    mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN;
+
+    /* Are we forcing Full or Half Duplex? */
+    if(hw->forced_speed_duplex == e1000_100_full ||
+       hw->forced_speed_duplex == e1000_10_full) {
+        /* We want to force full duplex so we SET the full duplex bits in the
+         * Device and MII Control Registers.
+         */
+        ctrl |= E1000_CTRL_FD;
+        mii_ctrl_reg |= MII_CR_FULL_DUPLEX;
+        DEBUGOUT("Full Duplex\n");
+    } else {
+        /* We want to force half duplex so we CLEAR the full duplex bits in
+         * the Device and MII Control Registers.
+         */
+        ctrl &= ~E1000_CTRL_FD;
+        mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX;
+        DEBUGOUT("Half Duplex\n");
+    }
+
+    /* Are we forcing 100Mbps??? */
+    if(hw->forced_speed_duplex == e1000_100_full ||
+       hw->forced_speed_duplex == e1000_100_half) {
+        /* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */
+        ctrl |= E1000_CTRL_SPD_100;
+        mii_ctrl_reg |= MII_CR_SPEED_100;
+        mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
+        DEBUGOUT("Forcing 100mb ");
+    } else {
+        /* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */
+        ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+        mii_ctrl_reg |= MII_CR_SPEED_10;
+        mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
+        DEBUGOUT("Forcing 10mb ");
+    }
+
+    e1000_config_collision_dist(hw);
+
+    /* Write the configured values back to the Device Control Reg. */
+    E1000_WRITE_REG(hw, CTRL, ctrl);
+
+    if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI
+     * forced whenever speed are duplex are forced.
+     */
+    phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+    if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+    DEBUGOUT1("M88E1000 PSCR: %x \n", phy_data);
+
+    /* Need to reset the PHY or these changes will be ignored */
+    mii_ctrl_reg |= MII_CR_RESET;
+
+    /* Write back the modified PHY MII control register. */
+    if(e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+    udelay(1);
+
+    /* The wait_autoneg_complete flag may be a little misleading here.
+     * Since we are forcing speed and duplex, Auto-Neg is not enabled.
+     * But we do want to delay for a period while forcing only so we
+     * don't generate false No Link messages.  So we will wait here
+     * only if the user has set wait_autoneg_complete to 1, which is
+     * the default.
+     */
+    if(hw->wait_autoneg_complete) {
+        /* We will wait for autoneg to complete. */
+        DEBUGOUT("Waiting for forced speed/duplex link.\n");
+        mii_status_reg = 0;
+
+        /* We will wait for autoneg to complete or 4.5 seconds to expire. */
+        for(i = PHY_FORCE_TIME; i > 0; i--) {
+            /* Read the MII Status Register and wait for Auto-Neg Complete bit
+             * to be set.
+             */
+            if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+            if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+            if(mii_status_reg & MII_SR_LINK_STATUS) break;
+            msec_delay(100);
+        }
+        if(i == 0) { /* We didn't get link */
+            /* Reset the DSP and wait again for link. */
+            
+            ret_val = e1000_phy_reset_dsp(hw);
+            if(ret_val < 0) {
+                DEBUGOUT("Error Resetting PHY DSP\n");
+                return ret_val;
+            }
+        }
+        /* This loop will early-out if the link condition has been met.  */
+        for(i = PHY_FORCE_TIME; i > 0; i--) {
+            if(mii_status_reg & MII_SR_LINK_STATUS) break;
+            msec_delay(100);
+            /* Read the MII Status Register and wait for Auto-Neg Complete bit
+             * to be set.
+             */
+            if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+            if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+        }
+    }
+    
+    /* Because we reset the PHY above, we need to re-force TX_CLK in the
+     * Extended PHY Specific Control Register to 25MHz clock.  This value
+     * defaults back to a 2.5MHz clock when the PHY is reset.
+     */
+    if(e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    phy_data |= M88E1000_EPSCR_TX_CLK_25;
+    if(e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+
+    /* In addition, because of the s/w reset above, we need to enable CRS on
+     * TX.  This must be set for both full and half duplex operation.
+     */
+    if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+    if(e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+    return 0;
+}
+
+/******************************************************************************
+* Sets the collision distance in the Transmit Control register
+*
+* hw - Struct containing variables accessed by shared code
+*
+* Link should have been established previously. Reads the speed and duplex
+* information from the Device Status register.
+******************************************************************************/
+void
+e1000_config_collision_dist(struct e1000_hw *hw)
+{
+    uint32_t tctl;
+
+    tctl = E1000_READ_REG(hw, TCTL);
+
+    tctl &= ~E1000_TCTL_COLD;
+    tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
+
+    E1000_WRITE_REG(hw, TCTL, tctl);
+    E1000_WRITE_FLUSH(hw);
+}
+
+/******************************************************************************
+* Sets MAC speed and duplex settings to reflect the those in the PHY
+*
+* hw - Struct containing variables accessed by shared code
+* mii_reg - data to write to the MII control register
+*
+* The contents of the PHY register containing the needed information need to
+* be passed in.
+******************************************************************************/
+static int32_t
+e1000_config_mac_to_phy(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    uint16_t phy_data;
+
+    DEBUGFUNC("e1000_config_mac_to_phy");
+
+    /* Read the Device Control Register and set the bits to Force Speed
+     * and Duplex.
+     */
+    ctrl = E1000_READ_REG(hw, CTRL);
+    ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+    ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
+
+    /* Set up duplex in the Device Control and Transmit Control
+     * registers depending on negotiated values.
+     */
+    if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    if(phy_data & M88E1000_PSSR_DPLX) ctrl |= E1000_CTRL_FD;
+    else ctrl &= ~E1000_CTRL_FD;
+
+    e1000_config_collision_dist(hw);
+
+    /* Set up speed in the Device Control register depending on
+     * negotiated values.
+     */
+    if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
+        ctrl |= E1000_CTRL_SPD_1000;
+    else if((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
+        ctrl |= E1000_CTRL_SPD_100;
+    /* Write the configured values back to the Device Control Reg. */
+    E1000_WRITE_REG(hw, CTRL, ctrl);
+    return 0;
+}
+
+/******************************************************************************
+ * Forces the MAC's flow control settings.
+ * 
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Sets the TFCE and RFCE bits in the device control register to reflect
+ * the adapter settings. TFCE and RFCE need to be explicitly set by
+ * software when a Copper PHY is used because autonegotiation is managed
+ * by the PHY rather than the MAC. Software must also configure these
+ * bits when link is forced on a fiber connection.
+ *****************************************************************************/
+static int32_t
+e1000_force_mac_fc(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+
+    DEBUGFUNC("e1000_force_mac_fc");
+
+    /* Get the current configuration of the Device Control Register */
+    ctrl = E1000_READ_REG(hw, CTRL);
+
+    /* Because we didn't get link via the internal auto-negotiation
+     * mechanism (we either forced link or we got link via PHY
+     * auto-neg), we have to manually enable/disable transmit an
+     * receive flow control.
+     *
+     * The "Case" statement below enables/disable flow control
+     * according to the "hw->fc" parameter.
+     *
+     * The possible values of the "fc" parameter are:
+     *      0:  Flow control is completely disabled
+     *      1:  Rx flow control is enabled (we can receive pause
+     *          frames but not send pause frames).
+     *      2:  Tx flow control is enabled (we can send pause frames
+     *          frames but we do not receive pause frames).
+     *      3:  Both Rx and TX flow control (symmetric) is enabled.
+     *  other:  No other values should be possible at this point.
+     */
+
+    switch (hw->fc) {
+    case e1000_fc_none:
+        ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+        break;
+    case e1000_fc_rx_pause:
+        ctrl &= (~E1000_CTRL_TFCE);
+        ctrl |= E1000_CTRL_RFCE;
+        break;
+    case e1000_fc_tx_pause:
+        ctrl &= (~E1000_CTRL_RFCE);
+        ctrl |= E1000_CTRL_TFCE;
+        break;
+    case e1000_fc_full:
+        ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+        break;
+    default:
+        DEBUGOUT("Flow control param set incorrectly\n");
+        return -E1000_ERR_CONFIG;
+    }
+
+    /* Disable TX Flow Control for 82542 (rev 2.0) */
+    if(hw->mac_type == e1000_82542_rev2_0)
+        ctrl &= (~E1000_CTRL_TFCE);
+
+    E1000_WRITE_REG(hw, CTRL, ctrl);
+    return 0;
+}
+
+/******************************************************************************
+ * Configures flow control settings after link is established
+ * 
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Should be called immediately after a valid link has been established.
+ * Forces MAC flow control settings if link was forced. When in MII/GMII mode
+ * and autonegotiation is enabled, the MAC flow control settings will be set
+ * based on the flow control negotiated by the PHY. In TBI mode, the TFCE
+ * and RFCE bits will be automaticaly set to the negotiated flow control mode.
+ *****************************************************************************/
+int32_t
+e1000_config_fc_after_link_up(struct e1000_hw *hw)
+{
+    int32_t ret_val;
+    uint16_t mii_status_reg;
+    uint16_t mii_nway_adv_reg;
+    uint16_t mii_nway_lp_ability_reg;
+    uint16_t speed;
+    uint16_t duplex;
+
+    DEBUGFUNC("e1000_config_fc_after_link_up");
+
+    /* Check for the case where we have fiber media and auto-neg failed
+     * so we had to force link.  In this case, we need to force the
+     * configuration of the MAC to match the "fc" parameter.
+     */
+    if(((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) ||
+       ((hw->media_type == e1000_media_type_copper) && (!hw->autoneg))) {
+        ret_val = e1000_force_mac_fc(hw);
+        if(ret_val < 0) {
+            DEBUGOUT("Error forcing flow control settings\n");
+            return ret_val;
+        }
+    }
+
+    /* Check for the case where we have copper media and auto-neg is
+     * enabled.  In this case, we need to check and see if Auto-Neg
+     * has completed, and if so, how the PHY and link partner has
+     * flow control configured.
+     */
+    if((hw->media_type == e1000_media_type_copper) && hw->autoneg) {
+        /* Read the MII Status Register and check to see if AutoNeg
+         * has completed.  We read this twice because this reg has
+         * some "sticky" (latched) bits.
+         */
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+            DEBUGOUT("PHY Read Error \n");
+            return -E1000_ERR_PHY;
+        }
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg) < 0) {
+            DEBUGOUT("PHY Read Error \n");
+            return -E1000_ERR_PHY;
+        }
+
+        if(mii_status_reg & MII_SR_AUTONEG_COMPLETE) {
+            /* The AutoNeg process has completed, so we now need to
+             * read both the Auto Negotiation Advertisement Register
+             * (Address 4) and the Auto_Negotiation Base Page Ability
+             * Register (Address 5) to determine how flow control was
+             * negotiated.
+             */
+            if(e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+            if(e1000_read_phy_reg(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+
+            /* Two bits in the Auto Negotiation Advertisement Register
+             * (Address 4) and two bits in the Auto Negotiation Base
+             * Page Ability Register (Address 5) determine flow control
+             * for both the PHY and the link partner.  The following
+             * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+             * 1999, describes these PAUSE resolution bits and how flow
+             * control is determined based upon these settings.
+             * NOTE:  DC = Don't Care
+             *
+             *   LOCAL DEVICE  |   LINK PARTNER
+             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+             *-------|---------|-------|---------|--------------------
+             *   0   |    0    |  DC   |   DC    | e1000_fc_none
+             *   0   |    1    |   0   |   DC    | e1000_fc_none
+             *   0   |    1    |   1   |    0    | e1000_fc_none
+             *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+             *   1   |    0    |   0   |   DC    | e1000_fc_none
+             *   1   |   DC    |   1   |   DC    | e1000_fc_full
+             *   1   |    1    |   0   |    0    | e1000_fc_none
+             *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+             *
+             */
+            /* Are both PAUSE bits set to 1?  If so, this implies
+             * Symmetric Flow Control is enabled at both ends.  The
+             * ASM_DIR bits are irrelevant per the spec.
+             *
+             * For Symmetric Flow Control:
+             *
+             *   LOCAL DEVICE  |   LINK PARTNER
+             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+             *-------|---------|-------|---------|--------------------
+             *   1   |   DC    |   1   |   DC    | e1000_fc_full
+             *
+             */
+            if((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+               (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+                /* Now we need to check if the user selected RX ONLY
+                 * of pause frames.  In this case, we had to advertise
+                 * FULL flow control because we could not advertise RX
+                 * ONLY. Hence, we must now check to see if we need to
+                 * turn OFF  the TRANSMISSION of PAUSE frames.
+                 */
+                if(hw->original_fc == e1000_fc_full) {
+                    hw->fc = e1000_fc_full;
+                    DEBUGOUT("Flow Control = FULL.\r\n");
+                } else {
+                    hw->fc = e1000_fc_rx_pause;
+                    DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
+                }
+            }
+            /* For receiving PAUSE frames ONLY.
+             *
+             *   LOCAL DEVICE  |   LINK PARTNER
+             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+             *-------|---------|-------|---------|--------------------
+             *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+             *
+             */
+            else if(!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+                    (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+                    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+                    (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+                hw->fc = e1000_fc_tx_pause;
+                DEBUGOUT("Flow Control = TX PAUSE frames only.\r\n");
+            }
+            /* For transmitting PAUSE frames ONLY.
+             *
+             *   LOCAL DEVICE  |   LINK PARTNER
+             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+             *-------|---------|-------|---------|--------------------
+             *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+             *
+             */
+            else if((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+                    (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+                    !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+                    (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+                hw->fc = e1000_fc_rx_pause;
+                DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
+            }
+            /* Per the IEEE spec, at this point flow control should be
+             * disabled.  However, we want to consider that we could
+             * be connected to a legacy switch that doesn't advertise
+             * desired flow control, but can be forced on the link
+             * partner.  So if we advertised no flow control, that is
+             * what we will resolve to.  If we advertised some kind of
+             * receive capability (Rx Pause Only or Full Flow Control)
+             * and the link partner advertised none, we will configure
+             * ourselves to enable Rx Flow Control only.  We can do
+             * this safely for two reasons:  If the link partner really
+             * didn't want flow control enabled, and we enable Rx, no
+             * harm done since we won't be receiving any PAUSE frames
+             * anyway.  If the intent on the link partner was to have
+             * flow control enabled, then by us enabling RX only, we
+             * can at least receive pause frames and process them.
+             * This is a good idea because in most cases, since we are
+             * predominantly a server NIC, more times than not we will
+             * be asked to delay transmission of packets than asking
+             * our link partner to pause transmission of frames.
+             */
+            else if(hw->original_fc == e1000_fc_none ||
+                    hw->original_fc == e1000_fc_tx_pause) {
+                hw->fc = e1000_fc_none;
+                DEBUGOUT("Flow Control = NONE.\r\n");
+            } else {
+                hw->fc = e1000_fc_rx_pause;
+                DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
+            }
+
+            /* Now we need to do one last check...  If we auto-
+             * negotiated to HALF DUPLEX, flow control should not be
+             * enabled per IEEE 802.3 spec.
+             */
+            e1000_get_speed_and_duplex(hw, &speed, &duplex);
+
+            if(duplex == HALF_DUPLEX)
+                hw->fc = e1000_fc_none;
+
+            /* Now we call a subroutine to actually force the MAC
+             * controller to use the correct flow control settings.
+             */
+            ret_val = e1000_force_mac_fc(hw);
+            if(ret_val < 0) {
+                DEBUGOUT("Error forcing flow control settings\n");
+                return ret_val;
+             }
+        } else {
+            DEBUGOUT("Copper PHY and Auto Neg has not completed.\r\n");
+        }
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Checks to see if the link status of the hardware has changed.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Called by any function that needs to check the link status of the adapter.
+ *****************************************************************************/
+int32_t
+e1000_check_for_link(struct e1000_hw *hw)
+{
+    uint32_t rxcw;
+    uint32_t ctrl;
+    uint32_t status;
+    uint32_t rctl;
+    uint32_t signal;
+    int32_t ret_val;
+    uint16_t phy_data;
+    uint16_t lp_capability;
+
+    DEBUGFUNC("e1000_check_for_link");
+    
+    /* On adapters with a MAC newer that 82544, SW Defineable pin 1 will be 
+     * set when the optics detect a signal. On older adapters, it will be 
+     * cleared when there is a signal
+     */
+    if(hw->mac_type > e1000_82544) signal = E1000_CTRL_SWDPIN1;
+    else signal = 0;
+
+    ctrl = E1000_READ_REG(hw, CTRL);
+    status = E1000_READ_REG(hw, STATUS);
+    rxcw = E1000_READ_REG(hw, RXCW);
+
+    /* If we have a copper PHY then we only want to go out to the PHY
+     * registers to see if Auto-Neg has completed and/or if our link
+     * status has changed.  The get_link_status flag will be set if we
+     * receive a Link Status Change interrupt or we have Rx Sequence
+     * Errors.
+     */
+    if((hw->media_type == e1000_media_type_copper) && hw->get_link_status) {
+        /* First we want to see if the MII Status Register reports
+         * link.  If so, then we want to get the current speed/duplex
+         * of the PHY.
+         * Read the register twice since the link bit is sticky.
+         */
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+
+        if(phy_data & MII_SR_LINK_STATUS) {
+            hw->get_link_status = FALSE;
+        } else {
+            /* No link detected */
+            return 0;
+        }
+
+        /* If we are forcing speed/duplex, then we simply return since
+         * we have already determined whether we have link or not.
+         */
+        if(!hw->autoneg) return -E1000_ERR_CONFIG;
+
+        /* We have a M88E1000 PHY and Auto-Neg is enabled.  If we
+         * have Si on board that is 82544 or newer, Auto
+         * Speed Detection takes care of MAC speed/duplex
+         * configuration.  So we only need to configure Collision
+         * Distance in the MAC.  Otherwise, we need to force
+         * speed/duplex on the MAC to the current PHY speed/duplex
+         * settings.
+         */
+        if(hw->mac_type >= e1000_82544)
+            e1000_config_collision_dist(hw);
+        else {
+            ret_val = e1000_config_mac_to_phy(hw);
+            if(ret_val < 0) {
+                DEBUGOUT("Error configuring MAC to PHY settings\n");
+                return ret_val;
+            }
+        }
+
+        /* Configure Flow Control now that Auto-Neg has completed. First, we 
+         * need to restore the desired flow control settings because we may
+         * have had to re-autoneg with a different link partner.
+         */
+        ret_val = e1000_config_fc_after_link_up(hw);
+        if(ret_val < 0) {
+            DEBUGOUT("Error configuring flow control\n");
+            return ret_val;
+        }
+
+        /* At this point we know that we are on copper and we have
+         * auto-negotiated link.  These are conditions for checking the link
+         * parter capability register.  We use the link partner capability to
+         * determine if TBI Compatibility needs to be turned on or off.  If
+         * the link partner advertises any speed in addition to Gigabit, then
+         * we assume that they are GMII-based, and TBI compatibility is not
+         * needed. If no other speeds are advertised, we assume the link
+         * partner is TBI-based, and we turn on TBI Compatibility.
+         */
+        if(hw->tbi_compatibility_en) {
+            if(e1000_read_phy_reg(hw, PHY_LP_ABILITY, &lp_capability) < 0) {
+                DEBUGOUT("PHY Read Error\n");
+                return -E1000_ERR_PHY;
+            }
+            if(lp_capability & (NWAY_LPAR_10T_HD_CAPS |
+                                NWAY_LPAR_10T_FD_CAPS |
+                                NWAY_LPAR_100TX_HD_CAPS |
+                                NWAY_LPAR_100TX_FD_CAPS |
+                                NWAY_LPAR_100T4_CAPS)) {
+                /* If our link partner advertises anything in addition to 
+                 * gigabit, we do not need to enable TBI compatibility.
+                 */
+                if(hw->tbi_compatibility_on) {
+                    /* If we previously were in the mode, turn it off. */
+                    rctl = E1000_READ_REG(hw, RCTL);
+                    rctl &= ~E1000_RCTL_SBP;
+                    E1000_WRITE_REG(hw, RCTL, rctl);
+                    hw->tbi_compatibility_on = FALSE;
+                }
+            } else {
+                /* If TBI compatibility is was previously off, turn it on. For
+                 * compatibility with a TBI link partner, we will store bad
+                 * packets. Some frames have an additional byte on the end and
+                 * will look like CRC errors to to the hardware.
+                 */
+                if(!hw->tbi_compatibility_on) {
+                    hw->tbi_compatibility_on = TRUE;
+                    rctl = E1000_READ_REG(hw, RCTL);
+                    rctl |= E1000_RCTL_SBP;
+                    E1000_WRITE_REG(hw, RCTL, rctl);
+                }
+            }
+        }
+    }
+    /* If we don't have link (auto-negotiation failed or link partner cannot
+     * auto-negotiate), the cable is plugged in (we have signal), and our
+     * link partner is not trying to auto-negotiate with us (we are receiving
+     * idles or data), we need to force link up. We also need to give
+     * auto-negotiation time to complete, in case the cable was just plugged
+     * in. The autoneg_failed flag does this.
+     */
+    else if((hw->media_type == e1000_media_type_fiber) &&
+            (!(status & E1000_STATUS_LU)) &&
+            ((ctrl & E1000_CTRL_SWDPIN1) == signal) &&
+            (!(rxcw & E1000_RXCW_C))) {
+        if(hw->autoneg_failed == 0) {
+            hw->autoneg_failed = 1;
+            return 0;
+        }
+        DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\r\n");
+
+        /* Disable auto-negotiation in the TXCW register */
+        E1000_WRITE_REG(hw, TXCW, (hw->txcw & ~E1000_TXCW_ANE));
+
+        /* Force link-up and also force full-duplex. */
+        ctrl = E1000_READ_REG(hw, CTRL);
+        ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+
+        /* Configure Flow Control after forcing link up. */
+        ret_val = e1000_config_fc_after_link_up(hw);
+        if(ret_val < 0) {
+            DEBUGOUT("Error configuring flow control\n");
+            return ret_val;
+        }
+    }
+    /* If we are forcing link and we are receiving /C/ ordered sets, re-enable
+     * auto-negotiation in the TXCW register and disable forced link in the
+     * Device Control register in an attempt to auto-negotiate with our link
+     * partner.
+     */
+    else if((hw->media_type == e1000_media_type_fiber) &&
+              (ctrl & E1000_CTRL_SLU) &&
+              (rxcw & E1000_RXCW_C)) {
+        DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\r\n");
+        E1000_WRITE_REG(hw, TXCW, hw->txcw);
+        E1000_WRITE_REG(hw, CTRL, (ctrl & ~E1000_CTRL_SLU));
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Detects the current speed and duplex settings of the hardware.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * speed - Speed of the connection
+ * duplex - Duplex setting of the connection
+ *****************************************************************************/
+void
+e1000_get_speed_and_duplex(struct e1000_hw *hw,
+                           uint16_t *speed,
+                           uint16_t *duplex)
+{
+    uint32_t status;
+
+    DEBUGFUNC("e1000_get_speed_and_duplex");
+
+    if(hw->mac_type >= e1000_82543) {
+        status = E1000_READ_REG(hw, STATUS);
+        if(status & E1000_STATUS_SPEED_1000) {
+            *speed = SPEED_1000;
+            DEBUGOUT("1000 Mbs, ");
+        } else if(status & E1000_STATUS_SPEED_100) {
+            *speed = SPEED_100;
+            DEBUGOUT("100 Mbs, ");
+        } else {
+            *speed = SPEED_10;
+            DEBUGOUT("10 Mbs, ");
+        }
+
+        if(status & E1000_STATUS_FD) {
+            *duplex = FULL_DUPLEX;
+            DEBUGOUT("Full Duplex\r\n");
+        } else {
+            *duplex = HALF_DUPLEX;
+            DEBUGOUT(" Half Duplex\r\n");
+        }
+    } else {
+        DEBUGOUT("1000 Mbs, Full Duplex\r\n");
+        *speed = SPEED_1000;
+        *duplex = FULL_DUPLEX;
+    }
+}
+
+/******************************************************************************
+* Blocks until autoneg completes or times out (~4.5 seconds)
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+int32_t
+e1000_wait_autoneg(struct e1000_hw *hw)
+{
+    uint16_t i;
+    uint16_t phy_data;
+
+    DEBUGFUNC("e1000_wait_autoneg");
+    DEBUGOUT("Waiting for Auto-Neg to complete.\n");
+
+    /* We will wait for autoneg to complete or 4.5 seconds to expire. */
+    for(i = PHY_AUTO_NEG_TIME; i > 0; i--) {
+        /* Read the MII Status Register and wait for Auto-Neg
+         * Complete bit to be set.
+         */
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) {
+            DEBUGOUT("PHY Read Error\n");
+            return -E1000_ERR_PHY;
+        }
+        if(phy_data & MII_SR_AUTONEG_COMPLETE) {
+            return 0;
+        }
+        msec_delay(100);
+    }
+    return 0;
+}
+
+/******************************************************************************
+* Raises the Management Data Clock
+*
+* hw - Struct containing variables accessed by shared code
+* ctrl - Device control register's current value
+******************************************************************************/
+static void
+e1000_raise_mdi_clk(struct e1000_hw *hw,
+                    uint32_t *ctrl)
+{
+    /* Raise the clock input to the Management Data Clock (by setting the MDC
+     * bit), and then delay 2 microseconds.
+     */
+    E1000_WRITE_REG(hw, CTRL, (*ctrl | E1000_CTRL_MDC));
+    E1000_WRITE_FLUSH(hw);
+    udelay(2);
+}
+
+/******************************************************************************
+* Lowers the Management Data Clock
+*
+* hw - Struct containing variables accessed by shared code
+* ctrl - Device control register's current value
+******************************************************************************/
+static void
+e1000_lower_mdi_clk(struct e1000_hw *hw,
+                    uint32_t *ctrl)
+{
+    /* Lower the clock input to the Management Data Clock (by clearing the MDC
+     * bit), and then delay 2 microseconds.
+     */
+    E1000_WRITE_REG(hw, CTRL, (*ctrl & ~E1000_CTRL_MDC));
+    E1000_WRITE_FLUSH(hw);
+    udelay(2);
+}
+
+/******************************************************************************
+* Shifts data bits out to the PHY
+*
+* hw - Struct containing variables accessed by shared code
+* data - Data to send out to the PHY
+* count - Number of bits to shift out
+*
+* Bits are shifted out in MSB to LSB order.
+******************************************************************************/
+static void
+e1000_shift_out_mdi_bits(struct e1000_hw *hw,
+                         uint32_t data,
+                         uint16_t count)
+{
+    uint32_t ctrl;
+    uint32_t mask;
+
+    /* We need to shift "count" number of bits out to the PHY. So, the value
+     * in the "data" parameter will be shifted out to the PHY one bit at a 
+     * time. In order to do this, "data" must be broken down into bits.
+     */
+    mask = 0x01;
+    mask <<= (count - 1);
+
+    ctrl = E1000_READ_REG(hw, CTRL);
+
+    /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */
+    ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR);
+
+    while(mask) {
+        /* A "1" is shifted out to the PHY by setting the MDIO bit to "1" and
+         * then raising and lowering the Management Data Clock. A "0" is
+         * shifted out to the PHY by setting the MDIO bit to "0" and then
+         * raising and lowering the clock.
+         */
+        if(data & mask) ctrl |= E1000_CTRL_MDIO;
+        else ctrl &= ~E1000_CTRL_MDIO;
+
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        E1000_WRITE_FLUSH(hw);
+
+        udelay(2);
+
+        e1000_raise_mdi_clk(hw, &ctrl);
+        e1000_lower_mdi_clk(hw, &ctrl);
+
+        mask = mask >> 1;
+    }
+}
+
+/******************************************************************************
+* Shifts data bits in from the PHY
+*
+* hw - Struct containing variables accessed by shared code
+*
+* Bits are shifted in in MSB to LSB order. 
+******************************************************************************/
+static uint16_t
+e1000_shift_in_mdi_bits(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    uint16_t data = 0;
+    uint8_t i;
+
+    /* In order to read a register from the PHY, we need to shift in a total
+     * of 18 bits from the PHY. The first two bit (turnaround) times are used
+     * to avoid contention on the MDIO pin when a read operation is performed.
+     * These two bits are ignored by us and thrown away. Bits are "shifted in"
+     * by raising the input to the Management Data Clock (setting the MDC bit),
+     * and then reading the value of the MDIO bit.
+     */ 
+    ctrl = E1000_READ_REG(hw, CTRL);
+
+    /* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as input. */
+    ctrl &= ~E1000_CTRL_MDIO_DIR;
+    ctrl &= ~E1000_CTRL_MDIO;
+
+    E1000_WRITE_REG(hw, CTRL, ctrl);
+    E1000_WRITE_FLUSH(hw);
+
+    /* Raise and Lower the clock before reading in the data. This accounts for
+     * the turnaround bits. The first clock occurred when we clocked out the
+     * last bit of the Register Address.
+     */
+    e1000_raise_mdi_clk(hw, &ctrl);
+    e1000_lower_mdi_clk(hw, &ctrl);
+
+    for(data = 0, i = 0; i < 16; i++) {
+        data = data << 1;
+        e1000_raise_mdi_clk(hw, &ctrl);
+        ctrl = E1000_READ_REG(hw, CTRL);
+        /* Check to see if we shifted in a "1". */
+        if(ctrl & E1000_CTRL_MDIO) data |= 1;
+        e1000_lower_mdi_clk(hw, &ctrl);
+    }
+
+    e1000_raise_mdi_clk(hw, &ctrl);
+    e1000_lower_mdi_clk(hw, &ctrl);
+
+    return data;
+}
+
+/*****************************************************************************
+* Reads the value from a PHY register
+*
+* hw - Struct containing variables accessed by shared code
+* reg_addr - address of the PHY register to read
+******************************************************************************/
+int32_t
+e1000_read_phy_reg(struct e1000_hw *hw,
+                   uint32_t reg_addr,
+                   uint16_t *phy_data)
+{
+    uint32_t i;
+    uint32_t mdic = 0;
+    const uint32_t phy_addr = 1;
+
+    DEBUGFUNC("XXXXe1000_read_phy_reg");
+
+    if(reg_addr > MAX_PHY_REG_ADDRESS) {
+        DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
+        return -E1000_ERR_PARAM;
+    }
+
+    if(hw->mac_type > e1000_82543) {
+        /* Set up Op-code, Phy Address, and register address in the MDI
+         * Control register.  The MAC will take care of interfacing with the
+         * PHY to retrieve the desired data.
+         */
+        mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
+                (phy_addr << E1000_MDIC_PHY_SHIFT) | 
+                (E1000_MDIC_OP_READ));
+
+        E1000_WRITE_REG(hw, MDIC, mdic);
+
+        /* Poll the ready bit to see if the MDI read completed */
+        for(i = 0; i < 64; i++) {
+            udelay(10);
+            mdic = E1000_READ_REG(hw, MDIC);
+            if(mdic & E1000_MDIC_READY) break;
+        }
+        if(!(mdic & E1000_MDIC_READY)) {
+            DEBUGOUT("MDI Read did not complete\n");
+            return -E1000_ERR_PHY;
+        }
+        if(mdic & E1000_MDIC_ERROR) {
+            DEBUGOUT("MDI Error\n");
+            return -E1000_ERR_PHY;
+        }
+        *phy_data = (uint16_t) mdic;
+    } else {
+        /* We must first send a preamble through the MDIO pin to signal the
+         * beginning of an MII instruction.  This is done by sending 32
+         * consecutive "1" bits.
+         */
+        e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
+
+        /* Now combine the next few fields that are required for a read
+         * operation.  We use this method instead of calling the
+         * e1000_shift_out_mdi_bits routine five different times. The format of
+         * a MII read instruction consists of a shift out of 14 bits and is
+         * defined as follows:
+         *    <Preamble><SOF><Op Code><Phy Addr><Reg Addr>
+         * followed by a shift in of 18 bits.  This first two bits shifted in
+         * are TurnAround bits used to avoid contention on the MDIO pin when a
+         * READ operation is performed.  These two bits are thrown away
+         * followed by a shift in of 16 bits which contains the desired data.
+         */
+        mdic = ((reg_addr) | (phy_addr << 5) | 
+                (PHY_OP_READ << 10) | (PHY_SOF << 12));
+
+        e1000_shift_out_mdi_bits(hw, mdic, 14);
+
+        /* Now that we've shifted out the read command to the MII, we need to
+         * "shift in" the 16-bit value (18 total bits) of the requested PHY
+         * register address.
+         */
+        *phy_data = e1000_shift_in_mdi_bits(hw);
+    }
+    return 0;
+}
+
+/******************************************************************************
+* Writes a value to a PHY register
+*
+* hw - Struct containing variables accessed by shared code
+* reg_addr - address of the PHY register to write
+* data - data to write to the PHY
+******************************************************************************/
+int32_t
+e1000_write_phy_reg(struct e1000_hw *hw,
+                    uint32_t reg_addr,
+                    uint16_t phy_data)
+{
+    uint32_t i;
+    uint32_t mdic = 0;
+    const uint32_t phy_addr = 1;
+
+    DEBUGFUNC("e1000_write_phy_reg");
+
+    if(reg_addr > MAX_PHY_REG_ADDRESS) {
+        DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
+        return -E1000_ERR_PARAM;
+    }
+
+    if(hw->mac_type > e1000_82543) {
+        /* Set up Op-code, Phy Address, register address, and data intended
+         * for the PHY register in the MDI Control register.  The MAC will take
+         * care of interfacing with the PHY to send the desired data.
+         */
+        mdic = (((uint32_t) phy_data) |
+                (reg_addr << E1000_MDIC_REG_SHIFT) |
+                (phy_addr << E1000_MDIC_PHY_SHIFT) | 
+                (E1000_MDIC_OP_WRITE));
+
+        E1000_WRITE_REG(hw, MDIC, mdic);
+
+        /* Poll the ready bit to see if the MDI read completed */
+        for(i = 0; i < 64; i++) {
+            udelay(10);
+            mdic = E1000_READ_REG(hw, MDIC);
+            if(mdic & E1000_MDIC_READY) break;
+        }
+        if(!(mdic & E1000_MDIC_READY)) {
+            DEBUGOUT("MDI Write did not complete\n");
+            return -E1000_ERR_PHY;
+        }
+    } else {
+        /* We'll need to use the SW defined pins to shift the write command
+         * out to the PHY. We first send a preamble to the PHY to signal the
+         * beginning of the MII instruction.  This is done by sending 32 
+         * consecutive "1" bits.
+         */
+        e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
+
+        /* Now combine the remaining required fields that will indicate a 
+         * write operation. We use this method instead of calling the
+         * e1000_shift_out_mdi_bits routine for each field in the command. The
+         * format of a MII write instruction is as follows:
+         * <Preamble><SOF><Op Code><Phy Addr><Reg Addr><Turnaround><Data>.
+         */
+        mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) |
+                (PHY_OP_WRITE << 12) | (PHY_SOF << 14));
+        mdic <<= 16;
+        mdic |= (uint32_t) phy_data;
+
+        e1000_shift_out_mdi_bits(hw, mdic, 32);
+    }
+    return 0;
+}
+
+/******************************************************************************
+* Returns the PHY to the power-on reset state
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+void
+e1000_phy_hw_reset(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+    uint32_t ctrl_ext;
+
+    DEBUGFUNC("e1000_phy_hw_reset");
+
+    DEBUGOUT("Resetting Phy...\n");
+
+    if(hw->mac_type > e1000_82543) {
+        /* Read the device control register and assert the E1000_CTRL_PHY_RST
+         * bit. Then, take it out of reset.
+         */
+        ctrl = E1000_READ_REG(hw, CTRL);
+        E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PHY_RST);
+        E1000_WRITE_FLUSH(hw);
+        msec_delay(10);
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        E1000_WRITE_FLUSH(hw);
+    } else {
+        /* Read the Extended Device Control Register, assert the PHY_RESET_DIR
+         * bit to put the PHY into reset. Then, take it out of reset.
+         */
+        ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
+        ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR;
+        ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA;
+        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+        E1000_WRITE_FLUSH(hw);
+        msec_delay(10);
+        ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA;
+        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+        E1000_WRITE_FLUSH(hw);
+    }
+    udelay(150);
+}
+
+/******************************************************************************
+* Resets the PHY
+*
+* hw - Struct containing variables accessed by shared code
+*
+* Sets bit 15 of the MII Control regiser
+******************************************************************************/
+int32_t
+e1000_phy_reset(struct e1000_hw *hw)
+{
+    uint16_t phy_data;
+
+    DEBUGFUNC("e1000_phy_reset");
+
+    if(e1000_read_phy_reg(hw, PHY_CTRL, &phy_data) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    phy_data |= MII_CR_RESET;
+    if(e1000_write_phy_reg(hw, PHY_CTRL, phy_data) < 0) {
+        DEBUGOUT("PHY Write Error\n");
+        return -E1000_ERR_PHY;
+    }
+    udelay(1);
+    return 0;
+}
+
+/******************************************************************************
+* Probes the expected PHY address for known PHY IDs
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+int32_t
+e1000_detect_gig_phy(struct e1000_hw *hw)
+{
+    uint16_t phy_id_high, phy_id_low;
+    boolean_t match = FALSE;
+
+    DEBUGFUNC("e1000_detect_gig_phy");
+
+    /* Read the PHY ID Registers to identify which PHY is onboard. */
+    if(e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    hw->phy_id = (uint32_t) (phy_id_high << 16);
+    udelay(2);
+    if(e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low) < 0) {
+        DEBUGOUT("PHY Read Error\n");
+        return -E1000_ERR_PHY;
+    }
+    hw->phy_id |= (uint32_t) (phy_id_low & PHY_REVISION_MASK);
+    hw->phy_revision = (uint32_t) phy_id_low & ~PHY_REVISION_MASK;
+
+    switch(hw->mac_type) {
+    case e1000_82543:
+        if(hw->phy_id == M88E1000_E_PHY_ID) match = TRUE;
+        break;
+    case e1000_82544:
+        if(hw->phy_id == M88E1000_I_PHY_ID) match = TRUE;
+        break;
+    case e1000_82540:
+    case e1000_82545:
+    case e1000_82546:
+        if(hw->phy_id == M88E1011_I_PHY_ID) match = TRUE;
+        break;
+    default:
+        DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type);
+        return -E1000_ERR_CONFIG;
+    }
+    if(match) {
+        DEBUGOUT1("PHY ID 0x%X detected\n", hw->phy_id);
+        return 0;
+    }
+    DEBUGOUT1("Invalid PHY ID 0x%X\n", hw->phy_id);
+    return -E1000_ERR_PHY;
+}
+
+/******************************************************************************
+* Resets the PHY's DSP
+*
+* hw - Struct containing variables accessed by shared code
+******************************************************************************/
+static int32_t
+e1000_phy_reset_dsp(struct e1000_hw *hw)
+{
+    int32_t ret_val = -E1000_ERR_PHY;
+    DEBUGFUNC("e1000_phy_reset_dsp");
+    
+    do {
+        if(e1000_write_phy_reg(hw, 29, 0x001d) < 0) break;
+        if(e1000_write_phy_reg(hw, 30, 0x00c1) < 0) break;
+        if(e1000_write_phy_reg(hw, 30, 0x0000) < 0) break;
+        ret_val = 0;
+    } while(0);
+
+    if(ret_val < 0) DEBUGOUT("PHY Write Error\n");
+    return ret_val;
+}
+
+/******************************************************************************
+* Get PHY information from various PHY registers
+*
+* hw - Struct containing variables accessed by shared code
+* phy_info - PHY information structure
+******************************************************************************/
+int32_t
+e1000_phy_get_info(struct e1000_hw *hw,
+                   struct e1000_phy_info *phy_info)
+{
+    int32_t ret_val = -E1000_ERR_PHY;
+    uint16_t phy_data;
+
+    DEBUGFUNC("e1000_phy_get_info");
+
+    phy_info->cable_length = e1000_cable_length_undefined;
+    phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined;
+    phy_info->cable_polarity = e1000_rev_polarity_undefined;
+    phy_info->polarity_correction = e1000_polarity_reversal_undefined;
+    phy_info->mdix_mode = e1000_auto_x_mode_undefined;
+    phy_info->local_rx = e1000_1000t_rx_status_undefined;
+    phy_info->remote_rx = e1000_1000t_rx_status_undefined;
+
+    if(hw->media_type != e1000_media_type_copper) {
+        DEBUGOUT("PHY info is only valid for copper media\n");
+        return -E1000_ERR_CONFIG;
+    }
+
+    do {
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) break;
+        if(e1000_read_phy_reg(hw, PHY_STATUS, &phy_data) < 0) break;
+        if((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) {
+            DEBUGOUT("PHY info is only valid if link is up\n");
+            return -E1000_ERR_CONFIG;
+        }
+
+        if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data) < 0)
+            break;
+        phy_info->extended_10bt_distance =
+            (phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
+            M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT;
+        phy_info->polarity_correction =
+            (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
+            M88E1000_PSCR_POLARITY_REVERSAL_SHIFT;
+
+        if(e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data) < 0)
+            break;
+        phy_info->cable_polarity = (phy_data & M88E1000_PSSR_REV_POLARITY) >>
+            M88E1000_PSSR_REV_POLARITY_SHIFT;
+        phy_info->mdix_mode = (phy_data & M88E1000_PSSR_MDIX) >>
+            M88E1000_PSSR_MDIX_SHIFT;
+        if(phy_data & M88E1000_PSSR_1000MBS) {
+            /* Cable Length Estimation and Local/Remote Receiver Informatoion
+             * are only valid at 1000 Mbps
+             */
+            phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+                                      M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+            if(e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data) < 0) 
+                break;
+            phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+                SR_1000T_LOCAL_RX_STATUS_SHIFT;
+            phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+                SR_1000T_REMOTE_RX_STATUS_SHIFT;
+        }
+        ret_val = 0;
+    } while(0);
+
+    if(ret_val < 0) DEBUGOUT("PHY Read Error\n");
+    return ret_val;
+}
+
+int32_t
+e1000_validate_mdi_setting(struct e1000_hw *hw)
+{
+    DEBUGFUNC("e1000_validate_mdi_settings");
+
+    if(!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) {
+        DEBUGOUT("Invalid MDI setting detected\n");
+        hw->mdix = 1;
+        return -E1000_ERR_CONFIG;
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Raises the EEPROM's clock input.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * eecd - EECD's current value
+ *****************************************************************************/
+static void
+e1000_raise_ee_clk(struct e1000_hw *hw,
+                   uint32_t *eecd)
+{
+    /* Raise the clock input to the EEPROM (by setting the SK bit), and then
+     * wait <delay> microseconds.
+     */
+    *eecd = *eecd | E1000_EECD_SK;
+    E1000_WRITE_REG(hw, EECD, *eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+}
+
+/******************************************************************************
+ * Lowers the EEPROM's clock input.
+ *
+ * hw - Struct containing variables accessed by shared code 
+ * eecd - EECD's current value
+ *****************************************************************************/
+static void
+e1000_lower_ee_clk(struct e1000_hw *hw,
+                   uint32_t *eecd)
+{
+    /* Lower the clock input to the EEPROM (by clearing the SK bit), and then 
+     * wait 50 microseconds. 
+     */
+    *eecd = *eecd & ~E1000_EECD_SK;
+    E1000_WRITE_REG(hw, EECD, *eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+}
+
+/******************************************************************************
+ * Shift data bits out to the EEPROM.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * data - data to send to the EEPROM
+ * count - number of bits to shift out
+ *****************************************************************************/
+static void
+e1000_shift_out_ee_bits(struct e1000_hw *hw,
+                        uint16_t data,
+                        uint16_t count)
+{
+    uint32_t eecd;
+    uint32_t mask;
+
+    /* We need to shift "count" bits out to the EEPROM. So, value in the
+     * "data" parameter will be shifted out to the EEPROM one bit at a time.
+     * In order to do this, "data" must be broken down into bits. 
+     */
+    mask = 0x01 << (count - 1);
+    eecd = E1000_READ_REG(hw, EECD);
+    eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+    do {
+        /* A "1" is shifted out to the EEPROM by setting bit "DI" to a "1",
+         * and then raising and then lowering the clock (the SK bit controls
+         * the clock input to the EEPROM).  A "0" is shifted out to the EEPROM
+         * by setting "DI" to "0" and then raising and then lowering the clock.
+         */
+        eecd &= ~E1000_EECD_DI;
+
+        if(data & mask)
+            eecd |= E1000_EECD_DI;
+
+        E1000_WRITE_REG(hw, EECD, eecd);
+        E1000_WRITE_FLUSH(hw);
+
+        udelay(50);
+
+        e1000_raise_ee_clk(hw, &eecd);
+        e1000_lower_ee_clk(hw, &eecd);
+
+        mask = mask >> 1;
+
+    } while(mask);
+
+    /* We leave the "DI" bit set to "0" when we leave this routine. */
+    eecd &= ~E1000_EECD_DI;
+    E1000_WRITE_REG(hw, EECD, eecd);
+}
+
+/******************************************************************************
+ * Shift data bits in from the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static uint16_t
+e1000_shift_in_ee_bits(struct e1000_hw *hw)
+{
+    uint32_t eecd;
+    uint32_t i;
+    uint16_t data;
+
+    /* In order to read a register from the EEPROM, we need to shift 'count'
+     * bits in from the EEPROM. Bits are "shifted in" by raising the clock
+     * input to the EEPROM (setting the SK bit), and then reading the value of
+     * the "DO" bit.  During this "shifting in" process the "DI" bit should
+     * always be clear.
+     */
+
+    eecd = E1000_READ_REG(hw, EECD);
+
+    eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+    data = 0;
+
+    for(i = 0; i < 16; i++) {
+        data = data << 1;
+        e1000_raise_ee_clk(hw, &eecd);
+
+        eecd = E1000_READ_REG(hw, EECD);
+
+        eecd &= ~(E1000_EECD_DI);
+        if(eecd & E1000_EECD_DO)
+            data |= 1;
+
+        e1000_lower_ee_clk(hw, &eecd);
+    }
+
+    return data;
+}
+
+/******************************************************************************
+ * Prepares EEPROM for access
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This 
+ * function should be called before issuing a command to the EEPROM.
+ *****************************************************************************/
+static void
+e1000_setup_eeprom(struct e1000_hw *hw)
+{
+    uint32_t eecd;
+
+    eecd = E1000_READ_REG(hw, EECD);
+
+    /* Clear SK and DI */
+    eecd &= ~(E1000_EECD_SK | E1000_EECD_DI);
+    E1000_WRITE_REG(hw, EECD, eecd);
+
+    /* Set CS */
+    eecd |= E1000_EECD_CS;
+    E1000_WRITE_REG(hw, EECD, eecd);
+}
+
+/******************************************************************************
+ * Returns EEPROM to a "standby" state
+ * 
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static void
+e1000_standby_eeprom(struct e1000_hw *hw)
+{
+    uint32_t eecd;
+
+    eecd = E1000_READ_REG(hw, EECD);
+
+    /* Deselct EEPROM */
+    eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+    E1000_WRITE_REG(hw, EECD, eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+
+    /* Clock high */
+    eecd |= E1000_EECD_SK;
+    E1000_WRITE_REG(hw, EECD, eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+
+    /* Select EEPROM */
+    eecd |= E1000_EECD_CS;
+    E1000_WRITE_REG(hw, EECD, eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+
+    /* Clock low */
+    eecd &= ~E1000_EECD_SK;
+    E1000_WRITE_REG(hw, EECD, eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+}
+
+/******************************************************************************
+ * Raises then lowers the EEPROM's clock pin
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static void
+e1000_clock_eeprom(struct e1000_hw *hw)
+{
+    uint32_t eecd;
+
+    eecd = E1000_READ_REG(hw, EECD);
+
+    /* Rising edge of clock */
+    eecd |= E1000_EECD_SK;
+    E1000_WRITE_REG(hw, EECD, eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+
+    /* Falling edge of clock */
+    eecd &= ~E1000_EECD_SK;
+    E1000_WRITE_REG(hw, EECD, eecd);
+    E1000_WRITE_FLUSH(hw);
+    udelay(50);
+}
+
+/******************************************************************************
+ * Terminates a command by lowering the EEPROM's chip select pin
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+static void
+e1000_cleanup_eeprom(struct e1000_hw *hw)
+{
+    uint32_t eecd;
+
+    eecd = E1000_READ_REG(hw, EECD);
+
+    eecd &= ~(E1000_EECD_CS | E1000_EECD_DI);
+
+    E1000_WRITE_REG(hw, EECD, eecd);
+
+    e1000_clock_eeprom(hw);
+}
+
+/******************************************************************************
+ * Reads a 16 bit word from the EEPROM.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset of  word in the EEPROM to read
+ * data - word read from the EEPROM 
+ *****************************************************************************/
+int32_t
+e1000_read_eeprom(struct e1000_hw *hw,
+                  uint16_t offset,
+                  uint16_t *data)
+{
+    uint32_t eecd;
+    uint32_t i = 0;
+    boolean_t large_eeprom = FALSE;
+
+    DEBUGFUNC("e1000_read_eeprom");
+
+    /* Request EEPROM Access */
+    if(hw->mac_type > e1000_82544) {
+        eecd = E1000_READ_REG(hw, EECD);
+        if(eecd & E1000_EECD_SIZE) large_eeprom = TRUE;
+        eecd |= E1000_EECD_REQ;
+        E1000_WRITE_REG(hw, EECD, eecd);
+        eecd = E1000_READ_REG(hw, EECD);
+        while((!(eecd & E1000_EECD_GNT)) && (i < 100)) {
+            i++;
+            udelay(5);
+            eecd = E1000_READ_REG(hw, EECD);
+        }
+        if(!(eecd & E1000_EECD_GNT)) {
+            eecd &= ~E1000_EECD_REQ;
+            E1000_WRITE_REG(hw, EECD, eecd);
+            DEBUGOUT("Could not acquire EEPROM grant\n");
+            return -E1000_ERR_EEPROM;
+        }
+    }
+
+    /*  Prepare the EEPROM for reading  */
+    e1000_setup_eeprom(hw);
+
+    /*  Send the READ command (opcode + addr)  */
+    e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE, 3);
+    if(large_eeprom) {
+        /* If we have a 256 word EEPROM, there are 8 address bits */
+        e1000_shift_out_ee_bits(hw, offset, 8);
+    } else {
+        /* If we have a 64 word EEPROM, there are 6 address bits */
+        e1000_shift_out_ee_bits(hw, offset, 6);
+    }
+
+    /* Read the data */
+    *data = e1000_shift_in_ee_bits(hw);
+
+    /* End this read operation */
+    e1000_standby_eeprom(hw);
+
+    /* Stop requesting EEPROM access */
+    if(hw->mac_type > e1000_82544) {
+        eecd = E1000_READ_REG(hw, EECD);
+        eecd &= ~E1000_EECD_REQ;
+        E1000_WRITE_REG(hw, EECD, eecd);
+    }
+
+    return 0;
+}
+
+/******************************************************************************
+ * Verifies that the EEPROM has a valid checksum
+ * 
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Reads the first 64 16 bit words of the EEPROM and sums the values read.
+ * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
+ * valid.
+ *****************************************************************************/
+int32_t
+e1000_validate_eeprom_checksum(struct e1000_hw *hw)
+{
+    uint16_t checksum = 0;
+    uint16_t i, eeprom_data;
+
+    DEBUGFUNC("e1000_validate_eeprom_checksum");
+
+    for(i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) {
+        if(e1000_read_eeprom(hw, i, &eeprom_data) < 0) {
+            DEBUGOUT("EEPROM Read Error\n");
+            return -E1000_ERR_EEPROM;
+        }
+        checksum += eeprom_data;
+    }
+
+    if(checksum == (uint16_t) EEPROM_SUM) {
+        return 0;
+    } else {
+        DEBUGOUT("EEPROM Checksum Invalid\n");    
+        return -E1000_ERR_EEPROM;
+    }
+}
+
+/******************************************************************************
+ * Calculates the EEPROM checksum and writes it to the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA.
+ * Writes the difference to word offset 63 of the EEPROM.
+ *****************************************************************************/
+int32_t
+e1000_update_eeprom_checksum(struct e1000_hw *hw)
+{
+    uint16_t checksum = 0;
+    uint16_t i, eeprom_data;
+
+    DEBUGFUNC("e1000_update_eeprom_checksum");
+
+    for(i = 0; i < EEPROM_CHECKSUM_REG; i++) {
+        if(e1000_read_eeprom(hw, i, &eeprom_data) < 0) {
+            DEBUGOUT("EEPROM Read Error\n");
+            return -E1000_ERR_EEPROM;
+        }
+        checksum += eeprom_data;
+    }
+    checksum = (uint16_t) EEPROM_SUM - checksum;
+    if(e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, checksum) < 0) {
+        DEBUGOUT("EEPROM Write Error\n");
+        return -E1000_ERR_EEPROM;
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Writes a 16 bit word to a given offset in the EEPROM.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset within the EEPROM to be written to
+ * data - 16 bit word to be writen to the EEPROM
+ *
+ * If e1000_update_eeprom_checksum is not called after this function, the 
+ * EEPROM will most likely contain an invalid checksum.
+ *****************************************************************************/
+int32_t
+e1000_write_eeprom(struct e1000_hw *hw,
+                   uint16_t offset,
+                   uint16_t data)
+{
+    uint32_t eecd;
+    uint32_t i = 0;
+    int32_t status = 0;
+    boolean_t large_eeprom = FALSE;
+
+    DEBUGFUNC("e1000_write_eeprom");
+
+    /* Request EEPROM Access */
+    if(hw->mac_type > e1000_82544) {
+        eecd = E1000_READ_REG(hw, EECD);
+        if(eecd & E1000_EECD_SIZE) large_eeprom = TRUE;
+        eecd |= E1000_EECD_REQ;
+        E1000_WRITE_REG(hw, EECD, eecd);
+        eecd = E1000_READ_REG(hw, EECD);
+        while((!(eecd & E1000_EECD_GNT)) && (i < 100)) {
+            i++;
+            udelay(5);
+            eecd = E1000_READ_REG(hw, EECD);
+        }
+        if(!(eecd & E1000_EECD_GNT)) {
+            eecd &= ~E1000_EECD_REQ;
+            E1000_WRITE_REG(hw, EECD, eecd);
+            DEBUGOUT("Could not acquire EEPROM grant\n");
+            return -E1000_ERR_EEPROM;
+        }
+    }
+
+    /* Prepare the EEPROM for writing  */
+    e1000_setup_eeprom(hw);
+
+    /* Send the 9-bit (or 11-bit on large EEPROM) EWEN (write enable) command
+     * to the EEPROM (5-bit opcode plus 4/6-bit dummy). This puts the EEPROM
+     * into write/erase mode. 
+     */
+    e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE, 5);
+    if(large_eeprom) 
+        e1000_shift_out_ee_bits(hw, 0, 6);
+    else
+        e1000_shift_out_ee_bits(hw, 0, 4);
+
+    /* Prepare the EEPROM */
+    e1000_standby_eeprom(hw);
+
+    /* Send the Write command (3-bit opcode + addr) */
+    e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE, 3);
+    if(large_eeprom) 
+        /* If we have a 256 word EEPROM, there are 8 address bits */
+        e1000_shift_out_ee_bits(hw, offset, 8);
+    else
+        /* If we have a 64 word EEPROM, there are 6 address bits */
+        e1000_shift_out_ee_bits(hw, offset, 6);
+
+    /* Send the data */
+    e1000_shift_out_ee_bits(hw, data, 16);
+
+    /* Toggle the CS line.  This in effect tells to EEPROM to actually execute 
+     * the command in question.
+     */
+    e1000_standby_eeprom(hw);
+
+    /* Now read DO repeatedly until is high (equal to '1').  The EEEPROM will
+     * signal that the command has been completed by raising the DO signal.
+     * If DO does not go high in 10 milliseconds, then error out.
+     */
+    for(i = 0; i < 200; i++) {
+        eecd = E1000_READ_REG(hw, EECD);
+        if(eecd & E1000_EECD_DO) break;
+        udelay(50);
+    }
+    if(i == 200) {
+        DEBUGOUT("EEPROM Write did not complete\n");
+        status = -E1000_ERR_EEPROM;
+    }
+
+    /* Recover from write */
+    e1000_standby_eeprom(hw);
+
+    /* Send the 9-bit (or 11-bit on large EEPROM) EWDS (write disable) command
+     * to the EEPROM (5-bit opcode plus 4/6-bit dummy). This takes the EEPROM
+     * out of write/erase mode.
+     */
+    e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE, 5);
+    if(large_eeprom) 
+        e1000_shift_out_ee_bits(hw, 0, 6);
+    else
+        e1000_shift_out_ee_bits(hw, 0, 4);
+
+    /* Done with writing */
+    e1000_cleanup_eeprom(hw);
+
+    /* Stop requesting EEPROM access */
+    if(hw->mac_type > e1000_82544) {
+        eecd = E1000_READ_REG(hw, EECD);
+        eecd &= ~E1000_EECD_REQ;
+        E1000_WRITE_REG(hw, EECD, eecd);
+    }
+
+    return status;
+}
+
+/******************************************************************************
+ * Reads the adapter's part number from the EEPROM
+ *
+ * hw - Struct containing variables accessed by shared code
+ * part_num - Adapter's part number
+ *****************************************************************************/
+int32_t
+e1000_read_part_num(struct e1000_hw *hw,
+                    uint32_t *part_num)
+{
+    uint16_t offset = EEPROM_PBA_BYTE_1;
+    uint16_t eeprom_data;
+
+    DEBUGFUNC("e1000_read_part_num");
+
+    /* Get word 0 from EEPROM */
+    if(e1000_read_eeprom(hw, offset, &eeprom_data) < 0) {
+        DEBUGOUT("EEPROM Read Error\n");
+        return -E1000_ERR_EEPROM;
+    }
+    /* Save word 0 in upper half of part_num */
+    *part_num = (uint32_t) (eeprom_data << 16);
+
+    /* Get word 1 from EEPROM */
+    if(e1000_read_eeprom(hw, ++offset, &eeprom_data) < 0) {
+        DEBUGOUT("EEPROM Read Error\n");
+        return -E1000_ERR_EEPROM;
+    }
+    /* Save word 1 in lower half of part_num */
+    *part_num |= eeprom_data;
+
+    return 0;
+}
+
+/******************************************************************************
+ * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the
+ * second function of dual function devices
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_read_mac_addr(struct e1000_hw * hw)
+{
+    uint16_t offset;
+    uint16_t eeprom_data, i;
+
+    DEBUGFUNC("e1000_read_mac_addr");
+
+    for(i = 0; i < NODE_ADDRESS_SIZE; i += 2) {
+        offset = i >> 1;
+        if(e1000_read_eeprom(hw, offset, &eeprom_data) < 0) {
+            DEBUGOUT("EEPROM Read Error\n");
+            return -E1000_ERR_EEPROM;
+        }
+        hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF);
+        hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8);
+    }
+    if((hw->mac_type == e1000_82546) &&
+       (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
+        if(hw->perm_mac_addr[5] & 0x01)
+            hw->perm_mac_addr[5] &= ~(0x01);
+        else
+            hw->perm_mac_addr[5] |= 0x01;
+    }
+    for(i = 0; i < NODE_ADDRESS_SIZE; i++)
+        hw->mac_addr[i] = hw->perm_mac_addr[i];
+    return 0;
+}
+
+/******************************************************************************
+ * Initializes receive address filters.
+ *
+ * hw - Struct containing variables accessed by shared code 
+ *
+ * Places the MAC address in receive address register 0 and clears the rest
+ * of the receive addresss registers. Clears the multicast table. Assumes
+ * the receiver is in reset when the routine is called.
+ *****************************************************************************/
+void
+e1000_init_rx_addrs(struct e1000_hw *hw)
+{
+    uint32_t i;
+    uint32_t addr_low;
+    uint32_t addr_high;
+
+    DEBUGFUNC("e1000_init_rx_addrs");
+
+    /* Setup the receive address. */
+    DEBUGOUT("Programming MAC Address into RAR[0]\n");
+    addr_low = (hw->mac_addr[0] |
+                (hw->mac_addr[1] << 8) |
+                (hw->mac_addr[2] << 16) | (hw->mac_addr[3] << 24));
+
+    addr_high = (hw->mac_addr[4] |
+                 (hw->mac_addr[5] << 8) | E1000_RAH_AV);
+
+    E1000_WRITE_REG_ARRAY(hw, RA, 0, addr_low);
+    E1000_WRITE_REG_ARRAY(hw, RA, 1, addr_high);
+
+    /* Zero out the other 15 receive addresses. */
+    DEBUGOUT("Clearing RAR[1-15]\n");
+    for(i = 1; i < E1000_RAR_ENTRIES; i++) {
+        E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+        E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+    }
+}
+
+/******************************************************************************
+ * Updates the MAC's list of multicast addresses.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * mc_addr_list - the list of new multicast addresses
+ * mc_addr_count - number of addresses
+ * pad - number of bytes between addresses in the list
+ *
+ * The given list replaces any existing list. Clears the last 15 receive
+ * address registers and the multicast table. Uses receive address registers
+ * for the first 15 multicast addresses, and hashes the rest into the 
+ * multicast table.
+ *****************************************************************************/
+void
+e1000_mc_addr_list_update(struct e1000_hw *hw,
+                          uint8_t *mc_addr_list,
+                          uint32_t mc_addr_count,
+                          uint32_t pad)
+{
+    uint32_t hash_value;
+    uint32_t i;
+    uint32_t rar_used_count = 1; /* RAR[0] is used for our MAC address */
+
+    DEBUGFUNC("e1000_mc_addr_list_update");
+
+    /* Set the new number of MC addresses that we are being requested to use. */
+    hw->num_mc_addrs = mc_addr_count;
+
+    /* Clear RAR[1-15] */
+    DEBUGOUT(" Clearing RAR[1-15]\n");
+    for(i = rar_used_count; i < E1000_RAR_ENTRIES; i++) {
+        E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+        E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+    }
+
+    /* Clear the MTA */
+    DEBUGOUT(" Clearing MTA\n");
+    for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++) {
+        E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+    }
+
+    /* Add the new addresses */
+    for(i = 0; i < mc_addr_count; i++) {
+        DEBUGOUT(" Adding the multicast addresses:\n");
+        DEBUGOUT7(" MC Addr #%d =%.2X %.2X %.2X %.2X %.2X %.2X\n", i,
+                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad)],
+                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 1],
+                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 2],
+                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 3],
+                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 4],
+                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 5]);
+
+        hash_value = e1000_hash_mc_addr(hw,
+                                        mc_addr_list +
+                                        (i * (ETH_LENGTH_OF_ADDRESS + pad)));
+
+        DEBUGOUT1(" Hash value = 0x%03X\n", hash_value);
+
+        /* Place this multicast address in the RAR if there is room, *
+         * else put it in the MTA            
+         */
+        if(rar_used_count < E1000_RAR_ENTRIES) {
+            e1000_rar_set(hw,
+                          mc_addr_list + (i * (ETH_LENGTH_OF_ADDRESS + pad)),
+                          rar_used_count);
+            rar_used_count++;
+        } else {
+            e1000_mta_set(hw, hash_value);
+        }
+    }
+    DEBUGOUT("MC Update Complete\n");
+}
+
+/******************************************************************************
+ * Hashes an address to determine its location in the multicast table
+ *
+ * hw - Struct containing variables accessed by shared code
+ * mc_addr - the multicast address to hash 
+ *****************************************************************************/
+uint32_t
+e1000_hash_mc_addr(struct e1000_hw *hw,
+                   uint8_t *mc_addr)
+{
+    uint32_t hash_value = 0;
+
+    /* The portion of the address that is used for the hash table is
+     * determined by the mc_filter_type setting.  
+     */
+    switch (hw->mc_filter_type) {
+    /* [0] [1] [2] [3] [4] [5]
+     * 01  AA  00  12  34  56
+     * LSB                 MSB
+     */
+    case 0:
+        /* [47:36] i.e. 0x563 for above example address */
+        hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4));
+        break;
+    case 1:
+        /* [46:35] i.e. 0xAC6 for above example address */
+        hash_value = ((mc_addr[4] >> 3) | (((uint16_t) mc_addr[5]) << 5));
+        break;
+    case 2:
+        /* [45:34] i.e. 0x5D8 for above example address */
+        hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6));
+        break;
+    case 3:
+        /* [43:32] i.e. 0x634 for above example address */
+        hash_value = ((mc_addr[4]) | (((uint16_t) mc_addr[5]) << 8));
+        break;
+    }
+
+    hash_value &= 0xFFF;
+    return hash_value;
+}
+
+/******************************************************************************
+ * Sets the bit in the multicast table corresponding to the hash value.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * hash_value - Multicast address hash value
+ *****************************************************************************/
+void
+e1000_mta_set(struct e1000_hw *hw,
+              uint32_t hash_value)
+{
+    uint32_t hash_bit, hash_reg;
+    uint32_t mta;
+    uint32_t temp;
+
+    /* The MTA is a register array of 128 32-bit registers.  
+     * It is treated like an array of 4096 bits.  We want to set 
+     * bit BitArray[hash_value]. So we figure out what register
+     * the bit is in, read it, OR in the new bit, then write
+     * back the new value.  The register is determined by the 
+     * upper 7 bits of the hash value and the bit within that 
+     * register are determined by the lower 5 bits of the value.
+     */
+    hash_reg = (hash_value >> 5) & 0x7F;
+    hash_bit = hash_value & 0x1F;
+
+    mta = E1000_READ_REG_ARRAY(hw, MTA, hash_reg);
+
+    mta |= (1 << hash_bit);
+
+    /* If we are on an 82544 and we are trying to write an odd offset
+     * in the MTA, save off the previous entry before writing and
+     * restore the old value after writing.
+     */
+    if((hw->mac_type == e1000_82544) && ((hash_reg & 0x1) == 1)) {
+        temp = E1000_READ_REG_ARRAY(hw, MTA, (hash_reg - 1));
+        E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta);
+        E1000_WRITE_REG_ARRAY(hw, MTA, (hash_reg - 1), temp);
+    } else {
+        E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta);
+    }
+}
+
+/******************************************************************************
+ * Puts an ethernet address into a receive address register.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * addr - Address to put into receive address register
+ * index - Receive address register to write
+ *****************************************************************************/
+void
+e1000_rar_set(struct e1000_hw *hw,
+              uint8_t *addr,
+              uint32_t index)
+{
+    uint32_t rar_low, rar_high;
+
+    /* HW expects these in little endian so we reverse the byte order
+     * from network order (big endian) to little endian              
+     */
+    rar_low = ((uint32_t) addr[0] |
+               ((uint32_t) addr[1] << 8) |
+               ((uint32_t) addr[2] << 16) | ((uint32_t) addr[3] << 24));
+
+    rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8) | E1000_RAH_AV);
+
+    E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low);
+    E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high);
+}
+
+/******************************************************************************
+ * Writes a value to the specified offset in the VLAN filter table.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - Offset in VLAN filer table to write
+ * value - Value to write into VLAN filter table
+ *****************************************************************************/
+void
+e1000_write_vfta(struct e1000_hw *hw,
+                 uint32_t offset,
+                 uint32_t value)
+{
+    uint32_t temp;
+
+    if((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) {
+        temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1));
+        E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
+        E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp);
+    } else {
+        E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
+    }
+}
+
+/******************************************************************************
+ * Clears the VLAN filer table
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_clear_vfta(struct e1000_hw *hw)
+{
+    uint32_t offset;
+
+    for(offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++)
+        E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
+}
+
+static int32_t
+e1000_id_led_init(struct e1000_hw * hw)
+{
+    uint32_t ledctl;
+    const uint32_t ledctl_mask = 0x000000FF;
+    const uint32_t ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+    const uint32_t ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+    uint16_t eeprom_data, i, temp;
+    const uint16_t led_mask = 0x0F;
+        
+    DEBUGFUNC("e1000_id_led_init");
+    
+    if(hw->mac_type < e1000_82540) {
+        /* Nothing to do */
+        return 0;
+    }
+    
+    ledctl = E1000_READ_REG(hw, LEDCTL);
+    hw->ledctl_default = ledctl;
+    hw->ledctl_mode1 = hw->ledctl_default;
+    hw->ledctl_mode2 = hw->ledctl_default;
+        
+    if(e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, &eeprom_data) < 0) {
+        DEBUGOUT("EEPROM Read Error\n");
+        return -E1000_ERR_EEPROM;
+    }
+    if((eeprom_data== ID_LED_RESERVED_0000) || 
+       (eeprom_data == ID_LED_RESERVED_FFFF)) eeprom_data = ID_LED_DEFAULT;
+    for(i = 0; i < 4; i++) {
+        temp = (eeprom_data >> (i << 2)) & led_mask;
+        switch(temp) {
+        case ID_LED_ON1_DEF2:
+        case ID_LED_ON1_ON2:
+        case ID_LED_ON1_OFF2:
+            hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+            hw->ledctl_mode1 |= ledctl_on << (i << 3);
+            break;
+        case ID_LED_OFF1_DEF2:
+        case ID_LED_OFF1_ON2:
+        case ID_LED_OFF1_OFF2:
+            hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+            hw->ledctl_mode1 |= ledctl_off << (i << 3);
+            break;
+        default:
+            /* Do nothing */
+            break;
+        }
+        switch(temp) {
+        case ID_LED_DEF1_ON2:
+        case ID_LED_ON1_ON2:
+        case ID_LED_OFF1_ON2:
+            hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+            hw->ledctl_mode2 |= ledctl_on << (i << 3);
+            break;
+        case ID_LED_DEF1_OFF2:
+        case ID_LED_ON1_OFF2:
+        case ID_LED_OFF1_OFF2:
+            hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+            hw->ledctl_mode2 |= ledctl_off << (i << 3);
+            break;
+        default:
+            /* Do nothing */
+            break;
+        }
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Prepares SW controlable LED for use and saves the current state of the LED.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_setup_led(struct e1000_hw *hw)
+{
+    uint32_t ledctl;
+ 
+    DEBUGFUNC("e1000_setup_led");
+   
+    switch(hw->device_id) {
+    case E1000_DEV_ID_82542:
+    case E1000_DEV_ID_82543GC_FIBER:
+    case E1000_DEV_ID_82543GC_COPPER:
+    case E1000_DEV_ID_82544EI_COPPER:
+    case E1000_DEV_ID_82544EI_FIBER:
+    case E1000_DEV_ID_82544GC_COPPER:
+    case E1000_DEV_ID_82544GC_LOM:
+        /* No setup necessary */
+        break;
+    case E1000_DEV_ID_82545EM_FIBER:
+    case E1000_DEV_ID_82546EB_FIBER:
+        ledctl = E1000_READ_REG(hw, LEDCTL);
+        /* Save current LEDCTL settings */
+        hw->ledctl_default = ledctl;
+        /* Turn off LED0 */
+        ledctl &= ~(E1000_LEDCTL_LED0_IVRT |
+                    E1000_LEDCTL_LED0_BLINK | 
+                    E1000_LEDCTL_LED0_MODE_MASK);
+        ledctl |= (E1000_LEDCTL_MODE_LED_OFF << E1000_LEDCTL_LED0_MODE_SHIFT);
+        E1000_WRITE_REG(hw, LEDCTL, ledctl);
+        break;
+    case E1000_DEV_ID_82540EP:
+    case E1000_DEV_ID_82540EP_LOM:
+    case E1000_DEV_ID_82540EP_LP:
+    case E1000_DEV_ID_82540EM:
+    case E1000_DEV_ID_82540EM_LOM:
+    case E1000_DEV_ID_82545EM_COPPER:
+    case E1000_DEV_ID_82546EB_COPPER:
+        E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1);
+        break;
+    default:
+        DEBUGOUT("Invalid device ID\n");
+        return -E1000_ERR_CONFIG;
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Restores the saved state of the SW controlable LED.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_cleanup_led(struct e1000_hw *hw)
+{
+    DEBUGFUNC("e1000_cleanup_led");
+
+    switch(hw->device_id) {
+    case E1000_DEV_ID_82542:
+    case E1000_DEV_ID_82543GC_FIBER:
+    case E1000_DEV_ID_82543GC_COPPER:
+    case E1000_DEV_ID_82544EI_COPPER:
+    case E1000_DEV_ID_82544EI_FIBER:
+    case E1000_DEV_ID_82544GC_COPPER:
+    case E1000_DEV_ID_82544GC_LOM:
+        /* No cleanup necessary */
+        break;
+    case E1000_DEV_ID_82540EP:
+    case E1000_DEV_ID_82540EP_LOM:
+    case E1000_DEV_ID_82540EP_LP:
+    case E1000_DEV_ID_82540EM:
+    case E1000_DEV_ID_82540EM_LOM:
+    case E1000_DEV_ID_82545EM_COPPER:
+    case E1000_DEV_ID_82545EM_FIBER:
+    case E1000_DEV_ID_82546EB_COPPER:
+    case E1000_DEV_ID_82546EB_FIBER:
+        /* Restore LEDCTL settings */
+        E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_default);
+        break;
+    default:
+        DEBUGOUT("Invalid device ID\n");
+        return -E1000_ERR_CONFIG;
+    }
+    return 0;
+}
+    
+/******************************************************************************
+ * Turns on the software controllable LED
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_led_on(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+
+    DEBUGFUNC("e1000_led_on");
+
+    switch(hw->device_id) {
+    case E1000_DEV_ID_82542:
+    case E1000_DEV_ID_82543GC_FIBER:
+    case E1000_DEV_ID_82543GC_COPPER:
+    case E1000_DEV_ID_82544EI_FIBER:
+        ctrl = E1000_READ_REG(hw, CTRL);
+        /* Set SW Defineable Pin 0 to turn on the LED */
+        ctrl |= E1000_CTRL_SWDPIN0;
+        ctrl |= E1000_CTRL_SWDPIO0;
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        break;
+    case E1000_DEV_ID_82544EI_COPPER:
+    case E1000_DEV_ID_82544GC_COPPER:
+    case E1000_DEV_ID_82544GC_LOM:
+    case E1000_DEV_ID_82545EM_FIBER:
+    case E1000_DEV_ID_82546EB_FIBER:
+        ctrl = E1000_READ_REG(hw, CTRL);
+        /* Clear SW Defineable Pin 0 to turn on the LED */
+        ctrl &= ~E1000_CTRL_SWDPIN0;
+        ctrl |= E1000_CTRL_SWDPIO0;
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        break;
+    case E1000_DEV_ID_82540EP:
+    case E1000_DEV_ID_82540EP_LOM:
+    case E1000_DEV_ID_82540EP_LP:
+    case E1000_DEV_ID_82540EM:
+    case E1000_DEV_ID_82540EM_LOM:
+    case E1000_DEV_ID_82545EM_COPPER:
+    case E1000_DEV_ID_82546EB_COPPER:
+        E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode2);
+        break;
+    default:
+        DEBUGOUT("Invalid device ID\n");
+        return -E1000_ERR_CONFIG;
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Turns off the software controllable LED
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+int32_t
+e1000_led_off(struct e1000_hw *hw)
+{
+    uint32_t ctrl;
+
+    DEBUGFUNC("e1000_led_off");
+
+    switch(hw->device_id) {
+    case E1000_DEV_ID_82542:
+    case E1000_DEV_ID_82543GC_FIBER:
+    case E1000_DEV_ID_82543GC_COPPER:
+    case E1000_DEV_ID_82544EI_FIBER:
+        ctrl = E1000_READ_REG(hw, CTRL);
+        /* Clear SW Defineable Pin 0 to turn off the LED */
+        ctrl &= ~E1000_CTRL_SWDPIN0;
+        ctrl |= E1000_CTRL_SWDPIO0;
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        break;
+    case E1000_DEV_ID_82544EI_COPPER:
+    case E1000_DEV_ID_82544GC_COPPER:
+    case E1000_DEV_ID_82544GC_LOM:
+    case E1000_DEV_ID_82545EM_FIBER:
+    case E1000_DEV_ID_82546EB_FIBER:
+        ctrl = E1000_READ_REG(hw, CTRL);
+        /* Set SW Defineable Pin 0 to turn off the LED */
+        ctrl |= E1000_CTRL_SWDPIN0;
+        ctrl |= E1000_CTRL_SWDPIO0;
+        E1000_WRITE_REG(hw, CTRL, ctrl);
+        break;
+    case E1000_DEV_ID_82540EP:
+    case E1000_DEV_ID_82540EP_LOM:
+    case E1000_DEV_ID_82540EP_LP:
+    case E1000_DEV_ID_82540EM:
+    case E1000_DEV_ID_82540EM_LOM:
+    case E1000_DEV_ID_82545EM_COPPER:
+    case E1000_DEV_ID_82546EB_COPPER:
+        E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1);
+        break;
+    default:
+        DEBUGOUT("Invalid device ID\n");
+        return -E1000_ERR_CONFIG;
+    }
+    return 0;
+}
+
+/******************************************************************************
+ * Clears all hardware statistics counters. 
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_clear_hw_cntrs(struct e1000_hw *hw)
+{
+    volatile uint32_t temp;
+
+    temp = E1000_READ_REG(hw, CRCERRS);
+    temp = E1000_READ_REG(hw, SYMERRS);
+    temp = E1000_READ_REG(hw, MPC);
+    temp = E1000_READ_REG(hw, SCC);
+    temp = E1000_READ_REG(hw, ECOL);
+    temp = E1000_READ_REG(hw, MCC);
+    temp = E1000_READ_REG(hw, LATECOL);
+    temp = E1000_READ_REG(hw, COLC);
+    temp = E1000_READ_REG(hw, DC);
+    temp = E1000_READ_REG(hw, SEC);
+    temp = E1000_READ_REG(hw, RLEC);
+    temp = E1000_READ_REG(hw, XONRXC);
+    temp = E1000_READ_REG(hw, XONTXC);
+    temp = E1000_READ_REG(hw, XOFFRXC);
+    temp = E1000_READ_REG(hw, XOFFTXC);
+    temp = E1000_READ_REG(hw, FCRUC);
+    temp = E1000_READ_REG(hw, PRC64);
+    temp = E1000_READ_REG(hw, PRC127);
+    temp = E1000_READ_REG(hw, PRC255);
+    temp = E1000_READ_REG(hw, PRC511);
+    temp = E1000_READ_REG(hw, PRC1023);
+    temp = E1000_READ_REG(hw, PRC1522);
+    temp = E1000_READ_REG(hw, GPRC);
+    temp = E1000_READ_REG(hw, BPRC);
+    temp = E1000_READ_REG(hw, MPRC);
+    temp = E1000_READ_REG(hw, GPTC);
+    temp = E1000_READ_REG(hw, GORCL);
+    temp = E1000_READ_REG(hw, GORCH);
+    temp = E1000_READ_REG(hw, GOTCL);
+    temp = E1000_READ_REG(hw, GOTCH);
+    temp = E1000_READ_REG(hw, RNBC);
+    temp = E1000_READ_REG(hw, RUC);
+    temp = E1000_READ_REG(hw, RFC);
+    temp = E1000_READ_REG(hw, ROC);
+    temp = E1000_READ_REG(hw, RJC);
+    temp = E1000_READ_REG(hw, TORL);
+    temp = E1000_READ_REG(hw, TORH);
+    temp = E1000_READ_REG(hw, TOTL);
+    temp = E1000_READ_REG(hw, TOTH);
+    temp = E1000_READ_REG(hw, TPR);
+    temp = E1000_READ_REG(hw, TPT);
+    temp = E1000_READ_REG(hw, PTC64);
+    temp = E1000_READ_REG(hw, PTC127);
+    temp = E1000_READ_REG(hw, PTC255);
+    temp = E1000_READ_REG(hw, PTC511);
+    temp = E1000_READ_REG(hw, PTC1023);
+    temp = E1000_READ_REG(hw, PTC1522);
+    temp = E1000_READ_REG(hw, MPTC);
+    temp = E1000_READ_REG(hw, BPTC);
+
+    if(hw->mac_type < e1000_82543) return;
+
+    temp = E1000_READ_REG(hw, ALGNERRC);
+    temp = E1000_READ_REG(hw, RXERRC);
+    temp = E1000_READ_REG(hw, TNCRS);
+    temp = E1000_READ_REG(hw, CEXTERR);
+    temp = E1000_READ_REG(hw, TSCTC);
+    temp = E1000_READ_REG(hw, TSCTFC);
+
+    if(hw->mac_type <= e1000_82544) return;
+
+    temp = E1000_READ_REG(hw, MGTPRC);
+    temp = E1000_READ_REG(hw, MGTPDC);
+    temp = E1000_READ_REG(hw, MGTPTC);
+}
+
+/******************************************************************************
+ * Resets Adaptive IFS to its default state.
+ *
+ * hw - Struct containing variables accessed by shared code
+ *
+ * Call this after e1000_init_hw. You may override the IFS defaults by setting
+ * hw->ifs_params_forced to TRUE. However, you must initialize hw->
+ * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio
+ * before calling this function.
+ *****************************************************************************/
+void
+e1000_reset_adaptive(struct e1000_hw *hw)
+{
+    DEBUGFUNC("e1000_reset_adaptive");
+
+    if(hw->adaptive_ifs) {
+        if(!hw->ifs_params_forced) {
+            hw->current_ifs_val = 0;
+            hw->ifs_min_val = IFS_MIN;
+            hw->ifs_max_val = IFS_MAX;
+            hw->ifs_step_size = IFS_STEP;
+            hw->ifs_ratio = IFS_RATIO;
+        }
+        hw->in_ifs_mode = FALSE;
+        E1000_WRITE_REG(hw, AIT, 0);
+    } else {
+        DEBUGOUT("Not in Adaptive IFS mode!\n");
+    }
+}
+
+/******************************************************************************
+ * Called during the callback/watchdog routine to update IFS value based on
+ * the ratio of transmits to collisions.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * tx_packets - Number of transmits since last callback
+ * total_collisions - Number of collisions since last callback
+ *****************************************************************************/
+void
+e1000_update_adaptive(struct e1000_hw *hw)
+{
+    DEBUGFUNC("e1000_update_adaptive");
+
+    if(hw->adaptive_ifs) {
+        if((hw->collision_delta * hw->ifs_ratio) > 
+           hw->tx_packet_delta) {
+            if(hw->tx_packet_delta > MIN_NUM_XMITS) {
+                hw->in_ifs_mode = TRUE;
+                if(hw->current_ifs_val < hw->ifs_max_val) {
+                    if(hw->current_ifs_val == 0)
+                        hw->current_ifs_val = hw->ifs_min_val;
+                    else
+                        hw->current_ifs_val += hw->ifs_step_size;
+                    E1000_WRITE_REG(hw, AIT, hw->current_ifs_val);
+                }
+            }
+        } else {
+            if((hw->in_ifs_mode == TRUE) && 
+               (hw->tx_packet_delta <= MIN_NUM_XMITS)) {
+                hw->current_ifs_val = 0;
+                hw->in_ifs_mode = FALSE;
+                E1000_WRITE_REG(hw, AIT, 0);
+            }
+        }
+    } else {
+        DEBUGOUT("Not in Adaptive IFS mode!\n");
+    }
+}
+
+/******************************************************************************
+ * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
+ * 
+ * hw - Struct containing variables accessed by shared code
+ * frame_len - The length of the frame in question
+ * mac_addr - The Ethernet destination address of the frame in question
+ *****************************************************************************/
+void
+e1000_tbi_adjust_stats(struct e1000_hw *hw,
+                       struct e1000_hw_stats *stats,
+                       uint32_t frame_len,
+                       uint8_t *mac_addr)
+{
+    uint64_t carry_bit;
+
+    /* First adjust the frame length. */
+    frame_len--;
+    /* We need to adjust the statistics counters, since the hardware
+     * counters overcount this packet as a CRC error and undercount
+     * the packet as a good packet
+     */
+    /* This packet should not be counted as a CRC error.    */
+    stats->crcerrs--;
+    /* This packet does count as a Good Packet Received.    */
+    stats->gprc++;
+
+    /* Adjust the Good Octets received counters             */
+    carry_bit = 0x80000000 & stats->gorcl;
+    stats->gorcl += frame_len;
+    /* If the high bit of Gorcl (the low 32 bits of the Good Octets
+     * Received Count) was one before the addition, 
+     * AND it is zero after, then we lost the carry out, 
+     * need to add one to Gorch (Good Octets Received Count High).
+     * This could be simplified if all environments supported 
+     * 64-bit integers.
+     */
+    if(carry_bit && ((stats->gorcl & 0x80000000) == 0))
+        stats->gorch++;
+    /* Is this a broadcast or multicast?  Check broadcast first,
+     * since the test for a multicast frame will test positive on 
+     * a broadcast frame.
+     */
+    if((mac_addr[0] == (uint8_t) 0xff) && (mac_addr[1] == (uint8_t) 0xff))
+        /* Broadcast packet */
+        stats->bprc++;
+    else if(*mac_addr & 0x01)
+        /* Multicast packet */
+        stats->mprc++;
+
+    if(frame_len == hw->max_frame_size) {
+        /* In this case, the hardware has overcounted the number of
+         * oversize frames.
+         */
+        if(stats->roc > 0)
+            stats->roc--;
+    }
+
+    /* Adjust the bin counters when the extra byte put the frame in the
+     * wrong bin. Remember that the frame_len was adjusted above.
+     */
+    if(frame_len == 64) {
+        stats->prc64++;
+        stats->prc127--;
+    } else if(frame_len == 127) {
+        stats->prc127++;
+        stats->prc255--;
+    } else if(frame_len == 255) {
+        stats->prc255++;
+        stats->prc511--;
+    } else if(frame_len == 511) {
+        stats->prc511++;
+        stats->prc1023--;
+    } else if(frame_len == 1023) {
+        stats->prc1023++;
+        stats->prc1522--;
+    } else if(frame_len == 1522) {
+        stats->prc1522++;
+    }
+}
+
+/******************************************************************************
+ * Gets the current PCI bus type, speed, and width of the hardware
+ *
+ * hw - Struct containing variables accessed by shared code
+ *****************************************************************************/
+void
+e1000_get_bus_info(struct e1000_hw *hw)
+{
+    uint32_t status;
+
+    if(hw->mac_type < e1000_82543) {
+        hw->bus_type = e1000_bus_type_unknown;
+        hw->bus_speed = e1000_bus_speed_unknown;
+        hw->bus_width = e1000_bus_width_unknown;
+        return;
+    }
+
+    status = E1000_READ_REG(hw, STATUS);
+    hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ?
+                   e1000_bus_type_pcix : e1000_bus_type_pci;
+    if(hw->bus_type == e1000_bus_type_pci) {
+        hw->bus_speed = (status & E1000_STATUS_PCI66) ?
+                        e1000_bus_speed_66 : e1000_bus_speed_33;
+    } else {
+        switch (status & E1000_STATUS_PCIX_SPEED) {
+        case E1000_STATUS_PCIX_SPEED_66:
+            hw->bus_speed = e1000_bus_speed_66;
+            break;
+        case E1000_STATUS_PCIX_SPEED_100:
+            hw->bus_speed = e1000_bus_speed_100;
+            break;
+        case E1000_STATUS_PCIX_SPEED_133:
+            hw->bus_speed = e1000_bus_speed_133;
+            break;
+        default:
+            hw->bus_speed = e1000_bus_speed_reserved;
+            break;
+        }
+    }
+    hw->bus_width = (status & E1000_STATUS_BUS64) ?
+                    e1000_bus_width_64 : e1000_bus_width_32;
+}
+/******************************************************************************
+ * Reads a value from one of the devices registers using port I/O (as opposed
+ * memory mapped I/O). Only 82544 and newer devices support port I/O.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset to read from
+ *****************************************************************************/
+uint32_t
+e1000_read_reg_io(struct e1000_hw *hw,
+                  uint32_t offset)
+{
+    uint32_t io_addr = hw->io_base;
+    uint32_t io_data = hw->io_base + 4;
+
+    e1000_io_write(hw, io_addr, offset);
+    return e1000_io_read(hw, io_data);
+}
+
+/******************************************************************************
+ * Writes a value to one of the devices registers using port I/O (as opposed to
+ * memory mapped I/O). Only 82544 and newer devices support port I/O.
+ *
+ * hw - Struct containing variables accessed by shared code
+ * offset - offset to write to
+ * value - value to write
+ *****************************************************************************/
+void
+e1000_write_reg_io(struct e1000_hw *hw,
+                   uint32_t offset,
+                   uint32_t value)
+{
+    uint32_t io_addr = hw->io_base;
+    uint32_t io_data = hw->io_base + 4;
+
+    e1000_io_write(hw, io_addr, offset);
+    e1000_io_write(hw, io_data, value);
+}
+
diff --git a/xen/drivers/net/e1000/e1000_hw.h b/xen/drivers/net/e1000/e1000_hw.h
new file mode 100644
index 0000000000..812dfd140f
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_hw.h
@@ -0,0 +1,1789 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* e1000_hw.h
+ * Structures, enums, and macros for the MAC
+ */
+
+#ifndef _E1000_HW_H_
+#define _E1000_HW_H_
+
+#include "e1000_osdep.h"
+
+/* Forward declarations of structures used by the shared code */
+struct e1000_hw;
+struct e1000_hw_stats;
+
+/* Enumerated types specific to the e1000 hardware */
+/* Media Access Controlers */
+typedef enum {
+    e1000_undefined = 0,
+    e1000_82542_rev2_0,
+    e1000_82542_rev2_1,
+    e1000_82543,
+    e1000_82544,
+    e1000_82540,
+    e1000_82545,
+    e1000_82546,
+    e1000_num_macs
+} e1000_mac_type;
+
+/* Media Types */
+typedef enum {
+    e1000_media_type_copper = 0,
+    e1000_media_type_fiber = 1,
+    e1000_num_media_types
+} e1000_media_type;
+
+typedef enum {
+    e1000_10_half = 0,
+    e1000_10_full = 1,
+    e1000_100_half = 2,
+    e1000_100_full = 3
+} e1000_speed_duplex_type;
+
+/* Flow Control Settings */
+typedef enum {
+    e1000_fc_none = 0,
+    e1000_fc_rx_pause = 1,
+    e1000_fc_tx_pause = 2,
+    e1000_fc_full = 3,
+    e1000_fc_default = 0xFF
+} e1000_fc_type;
+
+/* PCI bus types */
+typedef enum {
+    e1000_bus_type_unknown = 0,
+    e1000_bus_type_pci,
+    e1000_bus_type_pcix
+} e1000_bus_type;
+
+/* PCI bus speeds */
+typedef enum {
+    e1000_bus_speed_unknown = 0,
+    e1000_bus_speed_33,
+    e1000_bus_speed_66,
+    e1000_bus_speed_100,
+    e1000_bus_speed_133,
+    e1000_bus_speed_reserved
+} e1000_bus_speed;
+
+/* PCI bus widths */
+typedef enum {
+    e1000_bus_width_unknown = 0,
+    e1000_bus_width_32,
+    e1000_bus_width_64
+} e1000_bus_width;
+
+/* PHY status info structure and supporting enums */
+typedef enum {
+    e1000_cable_length_50 = 0,
+    e1000_cable_length_50_80,
+    e1000_cable_length_80_110,
+    e1000_cable_length_110_140,
+    e1000_cable_length_140,
+    e1000_cable_length_undefined = 0xFF
+} e1000_cable_length;
+
+typedef enum {
+    e1000_10bt_ext_dist_enable_normal = 0,
+    e1000_10bt_ext_dist_enable_lower,
+    e1000_10bt_ext_dist_enable_undefined = 0xFF
+} e1000_10bt_ext_dist_enable;
+
+typedef enum {
+    e1000_rev_polarity_normal = 0,
+    e1000_rev_polarity_reversed,
+    e1000_rev_polarity_undefined = 0xFF
+} e1000_rev_polarity;
+
+typedef enum {
+    e1000_polarity_reversal_enabled = 0,
+    e1000_polarity_reversal_disabled,
+    e1000_polarity_reversal_undefined = 0xFF
+} e1000_polarity_reversal;
+
+typedef enum {
+    e1000_auto_x_mode_manual_mdi = 0,
+    e1000_auto_x_mode_manual_mdix,
+    e1000_auto_x_mode_auto1,
+    e1000_auto_x_mode_auto2,
+    e1000_auto_x_mode_undefined = 0xFF
+} e1000_auto_x_mode;
+
+typedef enum {
+    e1000_1000t_rx_status_not_ok = 0,
+    e1000_1000t_rx_status_ok,
+    e1000_1000t_rx_status_undefined = 0xFF
+} e1000_1000t_rx_status;
+
+struct e1000_phy_info {
+    e1000_cable_length cable_length;
+    e1000_10bt_ext_dist_enable extended_10bt_distance;
+    e1000_rev_polarity cable_polarity;
+    e1000_polarity_reversal polarity_correction;
+    e1000_auto_x_mode mdix_mode;
+    e1000_1000t_rx_status local_rx;
+    e1000_1000t_rx_status remote_rx;
+};
+
+struct e1000_phy_stats {
+    uint32_t idle_errors;
+    uint32_t receive_errors;
+};
+
+
+
+/* Error Codes */
+#define E1000_SUCCESS      0
+#define E1000_ERR_EEPROM   1
+#define E1000_ERR_PHY      2
+#define E1000_ERR_CONFIG   3
+#define E1000_ERR_PARAM    4
+#define E1000_ERR_MAC_TYPE 5
+
+/* Function prototypes */
+/* Initialization */
+void e1000_reset_hw(struct e1000_hw *hw);
+int32_t e1000_init_hw(struct e1000_hw *hw);
+int32_t e1000_set_mac_type(struct e1000_hw *hw);
+
+/* Link Configuration */
+int32_t e1000_setup_link(struct e1000_hw *hw);
+int32_t e1000_phy_setup_autoneg(struct e1000_hw *hw);
+void e1000_config_collision_dist(struct e1000_hw *hw);
+int32_t e1000_config_fc_after_link_up(struct e1000_hw *hw);
+int32_t e1000_check_for_link(struct e1000_hw *hw);
+void e1000_get_speed_and_duplex(struct e1000_hw *hw, uint16_t * speed, uint16_t * duplex);
+int32_t e1000_wait_autoneg(struct e1000_hw *hw);
+
+/* PHY */
+int32_t e1000_read_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *phy_data);
+int32_t e1000_write_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data);
+void e1000_phy_hw_reset(struct e1000_hw *hw);
+int32_t e1000_phy_reset(struct e1000_hw *hw);
+int32_t e1000_detect_gig_phy(struct e1000_hw *hw);
+int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
+int32_t e1000_validate_mdi_setting(struct e1000_hw *hw);
+
+/* EEPROM Functions */
+int32_t e1000_read_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t *data);
+int32_t e1000_validate_eeprom_checksum(struct e1000_hw *hw);
+int32_t e1000_update_eeprom_checksum(struct e1000_hw *hw);
+int32_t e1000_write_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t data);
+int32_t e1000_read_part_num(struct e1000_hw *hw, uint32_t * part_num);
+int32_t e1000_read_mac_addr(struct e1000_hw * hw);
+
+/* Filters (multicast, vlan, receive) */
+void e1000_init_rx_addrs(struct e1000_hw *hw);
+void e1000_mc_addr_list_update(struct e1000_hw *hw, uint8_t * mc_addr_list, uint32_t mc_addr_count, uint32_t pad);
+uint32_t e1000_hash_mc_addr(struct e1000_hw *hw, uint8_t * mc_addr);
+void e1000_mta_set(struct e1000_hw *hw, uint32_t hash_value);
+void e1000_rar_set(struct e1000_hw *hw, uint8_t * mc_addr, uint32_t rar_index);
+void e1000_write_vfta(struct e1000_hw *hw, uint32_t offset, uint32_t value);
+void e1000_clear_vfta(struct e1000_hw *hw);
+
+/* LED functions */
+int32_t e1000_setup_led(struct e1000_hw *hw);
+int32_t e1000_cleanup_led(struct e1000_hw *hw);
+int32_t e1000_led_on(struct e1000_hw *hw);
+int32_t e1000_led_off(struct e1000_hw *hw);
+
+/* Adaptive IFS Functions */
+
+/* Everything else */
+void e1000_clear_hw_cntrs(struct e1000_hw *hw);
+void e1000_reset_adaptive(struct e1000_hw *hw);
+void e1000_update_adaptive(struct e1000_hw *hw);
+void e1000_tbi_adjust_stats(struct e1000_hw *hw, struct e1000_hw_stats *stats, uint32_t frame_len, uint8_t * mac_addr);
+void e1000_get_bus_info(struct e1000_hw *hw);
+void e1000_pci_set_mwi(struct e1000_hw *hw);
+void e1000_pci_clear_mwi(struct e1000_hw *hw);
+void e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value);
+void e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value);
+/* Port I/O is only supported on 82544 and newer */
+uint32_t e1000_io_read(struct e1000_hw *hw, uint32_t port);
+uint32_t e1000_read_reg_io(struct e1000_hw *hw, uint32_t offset);
+void e1000_io_write(struct e1000_hw *hw, uint32_t port, uint32_t value);
+void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, uint32_t value);
+#define E1000_READ_REG_IO(a, reg) \
+    e1000_read_reg_io((a), E1000_##reg)
+#define E1000_WRITE_REG_IO(a, reg, val) \
+    e1000_write_reg_io((a), E1000_##reg, val)
+
+/* PCI Device IDs */
+#define E1000_DEV_ID_82542               0x1000
+#define E1000_DEV_ID_82543GC_FIBER       0x1001
+#define E1000_DEV_ID_82543GC_COPPER      0x1004
+#define E1000_DEV_ID_82544EI_COPPER      0x1008
+#define E1000_DEV_ID_82544EI_FIBER       0x1009
+#define E1000_DEV_ID_82544GC_COPPER      0x100C
+#define E1000_DEV_ID_82544GC_LOM         0x100D
+#define E1000_DEV_ID_82540EM             0x100E
+#define E1000_DEV_ID_82540EM_LOM         0x1015
+#define E1000_DEV_ID_82540EP_LOM         0x1016
+#define E1000_DEV_ID_82540EP             0x1017
+#define E1000_DEV_ID_82540EP_LP          0x101E
+#define E1000_DEV_ID_82545EM_COPPER      0x100F
+#define E1000_DEV_ID_82545EM_FIBER       0x1011
+#define E1000_DEV_ID_82546EB_COPPER      0x1010
+#define E1000_DEV_ID_82546EB_FIBER       0x1012
+#define NUM_DEV_IDS 16
+
+#define NODE_ADDRESS_SIZE 6
+#define ETH_LENGTH_OF_ADDRESS 6
+
+/* MAC decode size is 128K - This is the size of BAR0 */
+#define MAC_DECODE_SIZE (128 * 1024)
+
+#define E1000_82542_2_0_REV_ID 2
+#define E1000_82542_2_1_REV_ID 3
+
+#define SPEED_10    10
+#define SPEED_100   100
+#define SPEED_1000  1000
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+/* The sizes (in bytes) of a ethernet packet */
+#define ENET_HEADER_SIZE             14
+#define MAXIMUM_ETHERNET_FRAME_SIZE  1518 /* With FCS */
+#define MINIMUM_ETHERNET_FRAME_SIZE  64   /* With FCS */
+#define ETHERNET_FCS_SIZE            4
+#define MAXIMUM_ETHERNET_PACKET_SIZE \
+    (MAXIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
+#define MINIMUM_ETHERNET_PACKET_SIZE \
+    (MINIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
+#define CRC_LENGTH                   ETHERNET_FCS_SIZE
+#define MAX_JUMBO_FRAME_SIZE         0x3F00
+
+
+/* 802.1q VLAN Packet Sizes */
+#define VLAN_TAG_SIZE                     4     /* 802.3ac tag (not DMAed) */
+
+/* Ethertype field values */
+#define ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.3ac packet */
+#define ETHERNET_IP_TYPE        0x0800  /* IP packets */
+#define ETHERNET_ARP_TYPE       0x0806  /* Address Resolution Protocol (ARP) */
+
+/* Packet Header defines */
+#define IP_PROTOCOL_TCP    6
+#define IP_PROTOCOL_UDP    0x11
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error 
+ */
+#define POLL_IMS_ENABLE_MASK ( \
+    E1000_IMS_RXDMT0 |         \
+    E1000_IMS_RXSEQ)
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXT0   = Receiver Timer Interrupt (ring 0)
+ *   o TXDW   = Transmit Descriptor Written Back
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error
+ *   o LSC    = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+    E1000_IMS_RXT0   |    \
+    E1000_IMS_TXDW   |    \
+    E1000_IMS_RXDMT0 |    \
+    E1000_IMS_RXSEQ  |    \
+    E1000_IMS_LSC)
+
+/* The number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor. We
+ * reserve one of these spots for our directed address, allowing us room for
+ * E1000_RAR_ENTRIES - 1 multicast addresses. 
+ */
+#define E1000_RAR_ENTRIES 16
+
+#define MIN_NUMBER_OF_DESCRIPTORS 8
+#define MAX_NUMBER_OF_DESCRIPTORS 0xFFF8
+
+/* Receive Descriptor */
+struct e1000_rx_desc {
+    uint64_t buffer_addr; /* Address of the descriptor's data buffer */
+    uint16_t length;     /* Length of data DMAed into data buffer */
+    uint16_t csum;       /* Packet checksum */
+    uint8_t status;      /* Descriptor status */
+    uint8_t errors;      /* Descriptor Errors */
+    uint16_t special;
+};
+
+/* Receive Decriptor bit definitions */
+#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
+#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
+#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
+#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
+#define E1000_RXD_STAT_IPCS     0x40    /* IP xsum calculated */
+#define E1000_RXD_STAT_PIF      0x80    /* passed in-exact filter */
+#define E1000_RXD_ERR_CE        0x01    /* CRC Error */
+#define E1000_RXD_ERR_SE        0x02    /* Symbol Error */
+#define E1000_RXD_ERR_SEQ       0x04    /* Sequence Error */
+#define E1000_RXD_ERR_CXE       0x10    /* Carrier Extension Error */
+#define E1000_RXD_ERR_TCPE      0x20    /* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE       0x40    /* IP Checksum Error */
+#define E1000_RXD_ERR_RXE       0x80    /* Rx Data Error */
+#define E1000_RXD_SPC_VLAN_MASK 0x0FFF  /* VLAN ID is in lower 12 bits */
+#define E1000_RXD_SPC_PRI_MASK  0xE000  /* Priority is in upper 3 bits */
+#define E1000_RXD_SPC_PRI_SHIFT 0x000D  /* Priority is in upper 3 of 16 */
+#define E1000_RXD_SPC_CFI_MASK  0x1000  /* CFI is bit 12 */
+#define E1000_RXD_SPC_CFI_SHIFT 0x000C  /* CFI is bit 12 */
+
+/* mask to determine if packets should be dropped due to frame errors */
+#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+    E1000_RXD_ERR_CE  |                \
+    E1000_RXD_ERR_SE  |                \
+    E1000_RXD_ERR_SEQ |                \
+    E1000_RXD_ERR_CXE |                \
+    E1000_RXD_ERR_RXE)
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+    uint64_t buffer_addr;       /* Address of the descriptor's data buffer */
+    union {
+        uint32_t data;
+        struct {
+            uint16_t length;    /* Data buffer length */
+            uint8_t cso;        /* Checksum offset */
+            uint8_t cmd;        /* Descriptor control */
+        } flags;
+    } lower;
+    union {
+        uint32_t data;
+        struct {
+            uint8_t status;     /* Descriptor status */
+            uint8_t css;        /* Checksum start */
+            uint16_t special;
+        } fields;
+    } upper;
+};
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_DTYP_D     0x00100000 /* Data Descriptor */
+#define E1000_TXD_DTYP_C     0x00000000 /* Context Descriptor */
+#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
+#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_IC     0x04000000 /* Insert Checksum */
+#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
+#define E1000_TXD_CMD_RPS    0x10000000 /* Report Packet Sent */
+#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
+#define E1000_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
+#define E1000_TXD_CMD_IDE    0x80000000 /* Enable Tidv register */
+#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
+#define E1000_TXD_STAT_EC    0x00000002 /* Excess Collisions */
+#define E1000_TXD_STAT_LC    0x00000004 /* Late Collisions */
+#define E1000_TXD_STAT_TU    0x00000008 /* Transmit underrun */
+#define E1000_TXD_CMD_TCP    0x01000000 /* TCP packet */
+#define E1000_TXD_CMD_IP     0x02000000 /* IP packet */
+#define E1000_TXD_CMD_TSE    0x04000000 /* TCP Seg enable */
+#define E1000_TXD_STAT_TC    0x00000004 /* Tx Underrun */
+
+/* Offload Context Descriptor */
+struct e1000_context_desc {
+    union {
+        uint32_t ip_config;
+        struct {
+            uint8_t ipcss;      /* IP checksum start */
+            uint8_t ipcso;      /* IP checksum offset */
+            uint16_t ipcse;     /* IP checksum end */
+        } ip_fields;
+    } lower_setup;
+    union {
+        uint32_t tcp_config;
+        struct {
+            uint8_t tucss;      /* TCP checksum start */
+            uint8_t tucso;      /* TCP checksum offset */
+            uint16_t tucse;     /* TCP checksum end */
+        } tcp_fields;
+    } upper_setup;
+    uint32_t cmd_and_length;    /* */
+    union {
+        uint32_t data;
+        struct {
+            uint8_t status;     /* Descriptor status */
+            uint8_t hdr_len;    /* Header length */
+            uint16_t mss;       /* Maximum segment size */
+        } fields;
+    } tcp_seg_setup;
+};
+
+/* Offload data descriptor */
+struct e1000_data_desc {
+    uint64_t buffer_addr;       /* Address of the descriptor's buffer address */
+    union {
+        uint32_t data;
+        struct {
+            uint16_t length;    /* Data buffer length */
+            uint8_t typ_len_ext;        /* */
+            uint8_t cmd;        /* */
+        } flags;
+    } lower;
+    union {
+        uint32_t data;
+        struct {
+            uint8_t status;     /* Descriptor status */
+            uint8_t popts;      /* Packet Options */
+            uint16_t special;   /* */
+        } fields;
+    } upper;
+};
+
+/* Filters */
+#define E1000_NUM_UNICAST          16   /* Unicast filter entries */
+#define E1000_MC_TBL_SIZE          128  /* Multicast Filter Table (4096 bits) */
+#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
+
+
+/* Receive Address Register */
+struct e1000_rar {
+    volatile uint32_t low;      /* receive address low */
+    volatile uint32_t high;     /* receive address high */
+};
+
+/* The number of entries in the Multicast Table Array (MTA). */
+#define E1000_NUM_MTA_REGISTERS 128
+
+/* IPv4 Address Table Entry */
+struct e1000_ipv4_at_entry {
+    volatile uint32_t ipv4_addr;        /* IP Address (RW) */
+    volatile uint32_t reserved;
+};
+
+/* Four wakeup IP addresses are supported */
+#define E1000_WAKEUP_IP_ADDRESS_COUNT_MAX 4
+#define E1000_IP4AT_SIZE                  E1000_WAKEUP_IP_ADDRESS_COUNT_MAX
+#define E1000_IP6AT_SIZE                  1
+
+/* IPv6 Address Table Entry */
+struct e1000_ipv6_at_entry {
+    volatile uint8_t ipv6_addr[16];
+};
+
+/* Flexible Filter Length Table Entry */
+struct e1000_fflt_entry {
+    volatile uint32_t length;   /* Flexible Filter Length (RW) */
+    volatile uint32_t reserved;
+};
+
+/* Flexible Filter Mask Table Entry */
+struct e1000_ffmt_entry {
+    volatile uint32_t mask;     /* Flexible Filter Mask (RW) */
+    volatile uint32_t reserved;
+};
+
+/* Flexible Filter Value Table Entry */
+struct e1000_ffvt_entry {
+    volatile uint32_t value;    /* Flexible Filter Value (RW) */
+    volatile uint32_t reserved;
+};
+
+/* Four Flexible Filters are supported */
+#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4
+
+/* Each Flexible Filter is at most 128 (0x80) bytes in length */
+#define E1000_FLEXIBLE_FILTER_SIZE_MAX  128
+
+#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX
+#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
+#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
+
+/* Register Set. (82543, 82544)
+ *
+ * Registers are defined to be 32 bits and  should be accessed as 32 bit values.
+ * These registers are physically located on the NIC, but are mapped into the 
+ * host memory address space.
+ *
+ * RW - register is both readable and writable
+ * RO - register is read only
+ * WO - register is write only
+ * R/clr - register is read only and is cleared when read
+ * A - register array
+ */
+#define E1000_CTRL     0x00000  /* Device Control - RW */
+#define E1000_STATUS   0x00008  /* Device Status - RO */
+#define E1000_EECD     0x00010  /* EEPROM/Flash Control - RW */
+#define E1000_EERD     0x00014  /* EEPROM Read - RW */
+#define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
+#define E1000_MDIC     0x00020  /* MDI Control - RW */
+#define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
+#define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
+#define E1000_FCT      0x00030  /* Flow Control Type - RW */
+#define E1000_VET      0x00038  /* VLAN Ether Type - RW */
+#define E1000_ICR      0x000C0  /* Interrupt Cause Read - R/clr */
+#define E1000_ITR      0x000C4  /* Interrupt Throttling Rate - RW */
+#define E1000_ICS      0x000C8  /* Interrupt Cause Set - WO */
+#define E1000_IMS      0x000D0  /* Interrupt Mask Set - RW */
+#define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
+#define E1000_RCTL     0x00100  /* RX Control - RW */
+#define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW     0x00178  /* TX Configuration Word - RW */
+#define E1000_RXCW     0x00180  /* RX Configuration Word - RO */
+#define E1000_TCTL     0x00400  /* TX Control - RW */
+#define E1000_TIPG     0x00410  /* TX Inter-packet gap -RW */
+#define E1000_TBT      0x00448  /* TX Burst Timer - RW */
+#define E1000_AIT      0x00458  /* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL   0x00E00  /* LED Control - RW */
+#define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
+#define E1000_FCRTL    0x02160  /* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH    0x02168  /* Flow Control Receive Threshold High - RW */
+#define E1000_RDBAL    0x02800  /* RX Descriptor Base Address Low - RW */
+#define E1000_RDBAH    0x02804  /* RX Descriptor Base Address High - RW */
+#define E1000_RDLEN    0x02808  /* RX Descriptor Length - RW */
+#define E1000_RDH      0x02810  /* RX Descriptor Head - RW */
+#define E1000_RDT      0x02818  /* RX Descriptor Tail - RW */
+#define E1000_RDTR     0x02820  /* RX Delay Timer - RW */
+#define E1000_RXDCTL   0x02828  /* RX Descriptor Control - RW */
+#define E1000_RADV     0x0282C  /* RX Interrupt Absolute Delay Timer - RW */
+#define E1000_RSRPD    0x02C00  /* RX Small Packet Detect - RW */
+#define E1000_TXDMAC   0x03000  /* TX DMA Control - RW */
+#define E1000_TDBAL    0x03800  /* TX Descriptor Base Address Low - RW */
+#define E1000_TDBAH    0x03804  /* TX Descriptor Base Address High - RW */
+#define E1000_TDLEN    0x03808  /* TX Descriptor Length - RW */
+#define E1000_TDH      0x03810  /* TX Descriptor Head - RW */
+#define E1000_TDT      0x03818  /* TX Descripotr Tail - RW */
+#define E1000_TIDV     0x03820  /* TX Interrupt Delay Value - RW */
+#define E1000_TXDCTL   0x03828  /* TX Descriptor Control - RW */
+#define E1000_TADV     0x0382C  /* TX Interrupt Absolute Delay Val - RW */
+#define E1000_TSPMT    0x03830  /* TCP Segmentation PAD & Min Threshold - RW */
+#define E1000_CRCERRS  0x04000  /* CRC Error Count - R/clr */
+#define E1000_ALGNERRC 0x04004  /* Alignment Error Count - R/clr */
+#define E1000_SYMERRS  0x04008  /* Symbol Error Count - R/clr */
+#define E1000_RXERRC   0x0400C  /* Receive Error Count - R/clr */
+#define E1000_MPC      0x04010  /* Missed Packet Count - R/clr */
+#define E1000_SCC      0x04014  /* Single Collision Count - R/clr */
+#define E1000_ECOL     0x04018  /* Excessive Collision Count - R/clr */
+#define E1000_MCC      0x0401C  /* Multiple Collision Count - R/clr */
+#define E1000_LATECOL  0x04020  /* Late Collision Count - R/clr */
+#define E1000_COLC     0x04028  /* Collision Count - R/clr */
+#define E1000_DC       0x04030  /* Defer Count - R/clr */
+#define E1000_TNCRS    0x04034  /* TX-No CRS - R/clr */
+#define E1000_SEC      0x04038  /* Sequence Error Count - R/clr */
+#define E1000_CEXTERR  0x0403C  /* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC     0x04040  /* Receive Length Error Count - R/clr */
+#define E1000_XONRXC   0x04048  /* XON RX Count - R/clr */
+#define E1000_XONTXC   0x0404C  /* XON TX Count - R/clr */
+#define E1000_XOFFRXC  0x04050  /* XOFF RX Count - R/clr */
+#define E1000_XOFFTXC  0x04054  /* XOFF TX Count - R/clr */
+#define E1000_FCRUC    0x04058  /* Flow Control RX Unsupported Count- R/clr */
+#define E1000_PRC64    0x0405C  /* Packets RX (64 bytes) - R/clr */
+#define E1000_PRC127   0x04060  /* Packets RX (65-127 bytes) - R/clr */
+#define E1000_PRC255   0x04064  /* Packets RX (128-255 bytes) - R/clr */
+#define E1000_PRC511   0x04068  /* Packets RX (255-511 bytes) - R/clr */
+#define E1000_PRC1023  0x0406C  /* Packets RX (512-1023 bytes) - R/clr */
+#define E1000_PRC1522  0x04070  /* Packets RX (1024-1522 bytes) - R/clr */
+#define E1000_GPRC     0x04074  /* Good Packets RX Count - R/clr */
+#define E1000_BPRC     0x04078  /* Broadcast Packets RX Count - R/clr */
+#define E1000_MPRC     0x0407C  /* Multicast Packets RX Count - R/clr */
+#define E1000_GPTC     0x04080  /* Good Packets TX Count - R/clr */
+#define E1000_GORCL    0x04088  /* Good Octets RX Count Low - R/clr */
+#define E1000_GORCH    0x0408C  /* Good Octets RX Count High - R/clr */
+#define E1000_GOTCL    0x04090  /* Good Octets TX Count Low - R/clr */
+#define E1000_GOTCH    0x04094  /* Good Octets TX Count High - R/clr */
+#define E1000_RNBC     0x040A0  /* RX No Buffers Count - R/clr */
+#define E1000_RUC      0x040A4  /* RX Undersize Count - R/clr */
+#define E1000_RFC      0x040A8  /* RX Fragment Count - R/clr */
+#define E1000_ROC      0x040AC  /* RX Oversize Count - R/clr */
+#define E1000_RJC      0x040B0  /* RX Jabber Count - R/clr */
+#define E1000_MGTPRC   0x040B4  /* Management Packets RX Count - R/clr */
+#define E1000_MGTPDC   0x040B8  /* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC   0x040BC  /* Management Packets TX Count - R/clr */
+#define E1000_TORL     0x040C0  /* Total Octets RX Low - R/clr */
+#define E1000_TORH     0x040C4  /* Total Octets RX High - R/clr */
+#define E1000_TOTL     0x040C8  /* Total Octets TX Low - R/clr */
+#define E1000_TOTH     0x040CC  /* Total Octets TX High - R/clr */
+#define E1000_TPR      0x040D0  /* Total Packets RX - R/clr */
+#define E1000_TPT      0x040D4  /* Total Packets TX - R/clr */
+#define E1000_PTC64    0x040D8  /* Packets TX (64 bytes) - R/clr */
+#define E1000_PTC127   0x040DC  /* Packets TX (65-127 bytes) - R/clr */
+#define E1000_PTC255   0x040E0  /* Packets TX (128-255 bytes) - R/clr */
+#define E1000_PTC511   0x040E4  /* Packets TX (256-511 bytes) - R/clr */
+#define E1000_PTC1023  0x040E8  /* Packets TX (512-1023 bytes) - R/clr */
+#define E1000_PTC1522  0x040EC  /* Packets TX (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC     0x040F0  /* Multicast Packets TX Count - R/clr */
+#define E1000_BPTC     0x040F4  /* Broadcast Packets TX Count - R/clr */
+#define E1000_TSCTC    0x040F8  /* TCP Segmentation Context TX - R/clr */
+#define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context TX Fail - R/clr */
+#define E1000_RXCSUM   0x05000  /* RX Checksum Control - RW */
+#define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
+#define E1000_RA       0x05400  /* Receive Address - RW Array */
+#define E1000_VFTA     0x05600  /* VLAN Filter Table Array - RW Array */
+#define E1000_WUC      0x05800  /* Wakeup Control - RW */
+#define E1000_WUFC     0x05808  /* Wakeup Filter Control - RW */
+#define E1000_WUS      0x05810  /* Wakeup Status - RO */
+#define E1000_MANC     0x05820  /* Management Control - RW */
+#define E1000_IPAV     0x05838  /* IP Address Valid - RW */
+#define E1000_IP4AT    0x05840  /* IPv4 Address Table - RW Array */
+#define E1000_IP6AT    0x05880  /* IPv6 Address Table - RW Array */
+#define E1000_WUPL     0x05900  /* Wakeup Packet Length - RW */
+#define E1000_WUPM     0x05A00  /* Wakeup Packet Memory - RO A */
+#define E1000_FFLT     0x05F00  /* Flexible Filter Length Table - RW Array */
+#define E1000_FFMT     0x09000  /* Flexible Filter Mask Table - RW Array */
+#define E1000_FFVT     0x09800  /* Flexible Filter Value Table - RW Array */
+
+/* Register Set (82542)
+ *
+ * Some of the 82542 registers are located at different offsets than they are
+ * in more current versions of the 8254x. Despite the difference in location,
+ * the registers function in the same manner.
+ */
+#define E1000_82542_CTRL     E1000_CTRL
+#define E1000_82542_STATUS   E1000_STATUS
+#define E1000_82542_EECD     E1000_EECD
+#define E1000_82542_EERD     E1000_EERD
+#define E1000_82542_CTRL_EXT E1000_CTRL_EXT
+#define E1000_82542_MDIC     E1000_MDIC
+#define E1000_82542_FCAL     E1000_FCAL
+#define E1000_82542_FCAH     E1000_FCAH
+#define E1000_82542_FCT      E1000_FCT
+#define E1000_82542_VET      E1000_VET
+#define E1000_82542_RA       0x00040
+#define E1000_82542_ICR      E1000_ICR
+#define E1000_82542_ITR      E1000_ITR
+#define E1000_82542_ICS      E1000_ICS
+#define E1000_82542_IMS      E1000_IMS
+#define E1000_82542_IMC      E1000_IMC
+#define E1000_82542_RCTL     E1000_RCTL
+#define E1000_82542_RDTR     0x00108
+#define E1000_82542_RDBAL    0x00110
+#define E1000_82542_RDBAH    0x00114
+#define E1000_82542_RDLEN    0x00118
+#define E1000_82542_RDH      0x00120
+#define E1000_82542_RDT      0x00128
+#define E1000_82542_FCRTH    0x00160
+#define E1000_82542_FCRTL    0x00168
+#define E1000_82542_FCTTV    E1000_FCTTV
+#define E1000_82542_TXCW     E1000_TXCW
+#define E1000_82542_RXCW     E1000_RXCW
+#define E1000_82542_MTA      0x00200
+#define E1000_82542_TCTL     E1000_TCTL
+#define E1000_82542_TIPG     E1000_TIPG
+#define E1000_82542_TDBAL    0x00420
+#define E1000_82542_TDBAH    0x00424
+#define E1000_82542_TDLEN    0x00428
+#define E1000_82542_TDH      0x00430
+#define E1000_82542_TDT      0x00438
+#define E1000_82542_TIDV     0x00440
+#define E1000_82542_TBT      E1000_TBT
+#define E1000_82542_AIT      E1000_AIT
+#define E1000_82542_VFTA     0x00600
+#define E1000_82542_LEDCTL   E1000_LEDCTL
+#define E1000_82542_PBA      E1000_PBA
+#define E1000_82542_RXDCTL   E1000_RXDCTL
+#define E1000_82542_RADV     E1000_RADV
+#define E1000_82542_RSRPD    E1000_RSRPD
+#define E1000_82542_TXDMAC   E1000_TXDMAC
+#define E1000_82542_TXDCTL   E1000_TXDCTL
+#define E1000_82542_TADV     E1000_TADV
+#define E1000_82542_TSPMT    E1000_TSPMT
+#define E1000_82542_CRCERRS  E1000_CRCERRS
+#define E1000_82542_ALGNERRC E1000_ALGNERRC
+#define E1000_82542_SYMERRS  E1000_SYMERRS
+#define E1000_82542_RXERRC   E1000_RXERRC
+#define E1000_82542_MPC      E1000_MPC
+#define E1000_82542_SCC      E1000_SCC
+#define E1000_82542_ECOL     E1000_ECOL
+#define E1000_82542_MCC      E1000_MCC
+#define E1000_82542_LATECOL  E1000_LATECOL
+#define E1000_82542_COLC     E1000_COLC
+#define E1000_82542_DC       E1000_DC
+#define E1000_82542_TNCRS    E1000_TNCRS
+#define E1000_82542_SEC      E1000_SEC
+#define E1000_82542_CEXTERR  E1000_CEXTERR
+#define E1000_82542_RLEC     E1000_RLEC
+#define E1000_82542_XONRXC   E1000_XONRXC
+#define E1000_82542_XONTXC   E1000_XONTXC
+#define E1000_82542_XOFFRXC  E1000_XOFFRXC
+#define E1000_82542_XOFFTXC  E1000_XOFFTXC
+#define E1000_82542_FCRUC    E1000_FCRUC
+#define E1000_82542_PRC64    E1000_PRC64
+#define E1000_82542_PRC127   E1000_PRC127
+#define E1000_82542_PRC255   E1000_PRC255
+#define E1000_82542_PRC511   E1000_PRC511
+#define E1000_82542_PRC1023  E1000_PRC1023
+#define E1000_82542_PRC1522  E1000_PRC1522
+#define E1000_82542_GPRC     E1000_GPRC
+#define E1000_82542_BPRC     E1000_BPRC
+#define E1000_82542_MPRC     E1000_MPRC
+#define E1000_82542_GPTC     E1000_GPTC
+#define E1000_82542_GORCL    E1000_GORCL
+#define E1000_82542_GORCH    E1000_GORCH
+#define E1000_82542_GOTCL    E1000_GOTCL
+#define E1000_82542_GOTCH    E1000_GOTCH
+#define E1000_82542_RNBC     E1000_RNBC
+#define E1000_82542_RUC      E1000_RUC
+#define E1000_82542_RFC      E1000_RFC
+#define E1000_82542_ROC      E1000_ROC
+#define E1000_82542_RJC      E1000_RJC
+#define E1000_82542_MGTPRC   E1000_MGTPRC
+#define E1000_82542_MGTPDC   E1000_MGTPDC
+#define E1000_82542_MGTPTC   E1000_MGTPTC
+#define E1000_82542_TORL     E1000_TORL
+#define E1000_82542_TORH     E1000_TORH
+#define E1000_82542_TOTL     E1000_TOTL
+#define E1000_82542_TOTH     E1000_TOTH
+#define E1000_82542_TPR      E1000_TPR
+#define E1000_82542_TPT      E1000_TPT
+#define E1000_82542_PTC64    E1000_PTC64
+#define E1000_82542_PTC127   E1000_PTC127
+#define E1000_82542_PTC255   E1000_PTC255
+#define E1000_82542_PTC511   E1000_PTC511
+#define E1000_82542_PTC1023  E1000_PTC1023
+#define E1000_82542_PTC1522  E1000_PTC1522
+#define E1000_82542_MPTC     E1000_MPTC
+#define E1000_82542_BPTC     E1000_BPTC
+#define E1000_82542_TSCTC    E1000_TSCTC
+#define E1000_82542_TSCTFC   E1000_TSCTFC
+#define E1000_82542_RXCSUM   E1000_RXCSUM
+#define E1000_82542_WUC      E1000_WUC
+#define E1000_82542_WUFC     E1000_WUFC
+#define E1000_82542_WUS      E1000_WUS
+#define E1000_82542_MANC     E1000_MANC
+#define E1000_82542_IPAV     E1000_IPAV
+#define E1000_82542_IP4AT    E1000_IP4AT
+#define E1000_82542_IP6AT    E1000_IP6AT
+#define E1000_82542_WUPL     E1000_WUPL
+#define E1000_82542_WUPM     E1000_WUPM
+#define E1000_82542_FFLT     E1000_FFLT
+#define E1000_82542_FFMT     E1000_FFMT
+#define E1000_82542_FFVT     E1000_FFVT
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+    uint64_t crcerrs;
+    uint64_t algnerrc;
+    uint64_t symerrs;
+    uint64_t rxerrc;
+    uint64_t mpc;
+    uint64_t scc;
+    uint64_t ecol;
+    uint64_t mcc;
+    uint64_t latecol;
+    uint64_t colc;
+    uint64_t dc;
+    uint64_t tncrs;
+    uint64_t sec;
+    uint64_t cexterr;
+    uint64_t rlec;
+    uint64_t xonrxc;
+    uint64_t xontxc;
+    uint64_t xoffrxc;
+    uint64_t xofftxc;
+    uint64_t fcruc;
+    uint64_t prc64;
+    uint64_t prc127;
+    uint64_t prc255;
+    uint64_t prc511;
+    uint64_t prc1023;
+    uint64_t prc1522;
+    uint64_t gprc;
+    uint64_t bprc;
+    uint64_t mprc;
+    uint64_t gptc;
+    uint64_t gorcl;
+    uint64_t gorch;
+    uint64_t gotcl;
+    uint64_t gotch;
+    uint64_t rnbc;
+    uint64_t ruc;
+    uint64_t rfc;
+    uint64_t roc;
+    uint64_t rjc;
+    uint64_t mgprc;
+    uint64_t mgpdc;
+    uint64_t mgptc;
+    uint64_t torl;
+    uint64_t torh;
+    uint64_t totl;
+    uint64_t toth;
+    uint64_t tpr;
+    uint64_t tpt;
+    uint64_t ptc64;
+    uint64_t ptc127;
+    uint64_t ptc255;
+    uint64_t ptc511;
+    uint64_t ptc1023;
+    uint64_t ptc1522;
+    uint64_t mptc;
+    uint64_t bptc;
+    uint64_t tsctc;
+    uint64_t tsctfc;
+};
+
+/* Structure containing variables used by the shared code (e1000_hw.c) */
+struct e1000_hw {
+    uint8_t *hw_addr;
+    e1000_mac_type mac_type;
+    e1000_media_type media_type;
+    void *back;
+    e1000_fc_type fc;
+    e1000_bus_speed bus_speed;
+    e1000_bus_width bus_width;
+    e1000_bus_type bus_type;
+    uint32_t io_base;
+    uint32_t phy_id;
+    uint32_t phy_revision;
+    uint32_t phy_addr;
+    uint32_t original_fc;
+    uint32_t txcw;
+    uint32_t autoneg_failed;
+    uint32_t max_frame_size;
+    uint32_t min_frame_size;
+    uint32_t mc_filter_type;
+    uint32_t num_mc_addrs;
+    uint32_t collision_delta;
+    uint32_t tx_packet_delta;
+    uint32_t ledctl_default;
+    uint32_t ledctl_mode1;
+    uint32_t ledctl_mode2;
+    uint16_t autoneg_advertised;
+    uint16_t pci_cmd_word;
+    uint16_t fc_high_water;
+    uint16_t fc_low_water;
+    uint16_t fc_pause_time;
+    uint16_t current_ifs_val;
+    uint16_t ifs_min_val;
+    uint16_t ifs_max_val;
+    uint16_t ifs_step_size;
+    uint16_t ifs_ratio;
+    uint16_t device_id;
+    uint16_t vendor_id;
+    uint16_t subsystem_id;
+    uint16_t subsystem_vendor_id;
+    uint8_t revision_id;
+    uint8_t autoneg;
+    uint8_t mdix;
+    uint8_t forced_speed_duplex;
+    uint8_t wait_autoneg_complete;
+    uint8_t dma_fairness;
+    uint8_t mac_addr[NODE_ADDRESS_SIZE];
+    uint8_t perm_mac_addr[NODE_ADDRESS_SIZE];
+    boolean_t disable_polarity_correction;
+    boolean_t get_link_status;
+    boolean_t tbi_compatibility_en;
+    boolean_t tbi_compatibility_on;
+    boolean_t fc_send_xon;
+    boolean_t report_tx_early;
+    boolean_t adaptive_ifs;
+    boolean_t ifs_params_forced;
+    boolean_t in_ifs_mode;
+};
+
+
+#define E1000_EEPROM_SWDPIN0   0x0001   /* SWDPIN 0 EEPROM Value */
+#define E1000_EEPROM_LED_LOGIC 0x0020   /* Led Logic Word */
+
+/* Register Bit Masks */
+/* Device Control */
+#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
+#define E1000_CTRL_BEM      0x00000002  /* Endian Mode.0=little,1=big */
+#define E1000_CTRL_PRIOR    0x00000004  /* Priority on PCI. 0=rx,1=fair */
+#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_TME      0x00000010  /* Test mode. 0=normal,1=test */
+#define E1000_CTRL_SLE      0x00000020  /* Serial Link on 0=dis,1=en */
+#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
+#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
+#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
+#define E1000_CTRL_SPD_10   0x00000000  /* Force 10Mb */
+#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
+#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
+#define E1000_CTRL_BEM32    0x00000400  /* Big Endian 32 mode */
+#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
+#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
+#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
+#define E1000_CTRL_SWDPIN2  0x00100000  /* SWDPIN 2 value */
+#define E1000_CTRL_SWDPIN3  0x00200000  /* SWDPIN 3 value */
+#define E1000_CTRL_SWDPIO0  0x00400000  /* SWDPIN 0 Input or output */
+#define E1000_CTRL_SWDPIO1  0x00800000  /* SWDPIN 1 input or output */
+#define E1000_CTRL_SWDPIO2  0x01000000  /* SWDPIN 2 input or output */
+#define E1000_CTRL_SWDPIO3  0x02000000  /* SWDPIN 3 input or output */
+#define E1000_CTRL_RST      0x04000000  /* Global reset */
+#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
+#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
+#define E1000_CTRL_RTE      0x20000000  /* Routing tag enable */
+#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
+
+/* Device Status */
+#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
+#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
+#define E1000_STATUS_FUNC_0     0x00000000      /* Function 0 */
+#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
+#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
+#define E1000_STATUS_TBIMODE    0x00000020      /* TBI mode */
+#define E1000_STATUS_SPEED_MASK 0x000000C0
+#define E1000_STATUS_SPEED_10   0x00000000      /* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
+#define E1000_STATUS_ASDV       0x00000300      /* Auto speed detect value */
+#define E1000_STATUS_MTXCKOK    0x00000400      /* MTX clock running OK */
+#define E1000_STATUS_PCI66      0x00000800      /* In 66Mhz slot */
+#define E1000_STATUS_BUS64      0x00001000      /* In 64 bit slot */
+#define E1000_STATUS_PCIX_MODE  0x00002000      /* PCI-X mode */
+#define E1000_STATUS_PCIX_SPEED 0x0000C000      /* PCI-X bus speed */
+
+/* Constants used to intrepret the masked PCI-X bus speed. */
+#define E1000_STATUS_PCIX_SPEED_66  0x00000000 /* PCI-X bus speed  50-66 MHz */
+#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed  66-100 MHz */
+#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */
+
+/* EEPROM/Flash Control */
+#define E1000_EECD_SK        0x00000001 /* EEPROM Clock */
+#define E1000_EECD_CS        0x00000002 /* EEPROM Chip Select */
+#define E1000_EECD_DI        0x00000004 /* EEPROM Data In */
+#define E1000_EECD_DO        0x00000008 /* EEPROM Data Out */
+#define E1000_EECD_FWE_MASK  0x00000030 
+#define E1000_EECD_FWE_DIS   0x00000010 /* Disable FLASH writes */
+#define E1000_EECD_FWE_EN    0x00000020 /* Enable FLASH writes */
+#define E1000_EECD_FWE_SHIFT 4
+#define E1000_EECD_SIZE      0x00000200 /* EEPROM Size (0=64 word 1=256 word) */
+#define E1000_EECD_REQ       0x00000040 /* EEPROM Access Request */
+#define E1000_EECD_GNT       0x00000080 /* EEPROM Access Grant */
+#define E1000_EECD_PRES      0x00000100 /* EEPROM Present */
+
+/* EEPROM Read */
+#define E1000_EERD_START      0x00000001 /* Start Read */
+#define E1000_EERD_DONE       0x00000010 /* Read Done */
+#define E1000_EERD_ADDR_SHIFT 8
+#define E1000_EERD_ADDR_MASK  0x0000FF00 /* Read Address */
+#define E1000_EERD_DATA_SHIFT 16
+#define E1000_EERD_DATA_MASK  0xFFFF0000 /* Read Data */
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_GPI0_EN   0x00000001 /* Maps SDP4 to GPI0 */ 
+#define E1000_CTRL_EXT_GPI1_EN   0x00000002 /* Maps SDP5 to GPI1 */
+#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN
+#define E1000_CTRL_EXT_GPI2_EN   0x00000004 /* Maps SDP6 to GPI2 */
+#define E1000_CTRL_EXT_GPI3_EN   0x00000008 /* Maps SDP7 to GPI3 */
+#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */
+#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */
+#define E1000_CTRL_EXT_PHY_INT   E1000_CTRL_EXT_SDP5_DATA
+#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */
+#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */
+#define E1000_CTRL_EXT_SDP4_DIR  0x00000100 /* Direction of SDP4 0=in 1=out */
+#define E1000_CTRL_EXT_SDP5_DIR  0x00000200 /* Direction of SDP5 0=in 1=out */
+#define E1000_CTRL_EXT_SDP6_DIR  0x00000400 /* Direction of SDP6 0=in 1=out */
+#define E1000_CTRL_EXT_SDP7_DIR  0x00000800 /* Direction of SDP7 0=in 1=out */
+#define E1000_CTRL_EXT_ASDCHK    0x00001000 /* Initiate an ASD sequence */
+#define E1000_CTRL_EXT_EE_RST    0x00002000 /* Reinitialize from EEPROM */
+#define E1000_CTRL_EXT_IPS       0x00004000 /* Invert Power State */
+#define E1000_CTRL_EXT_SPD_BYPS  0x00008000 /* Speed Select Bypass */
+#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000
+#define E1000_CTRL_EXT_LINK_MODE_TBI  0x00C00000
+#define E1000_CTRL_EXT_WR_WMARK_MASK  0x03000000
+#define E1000_CTRL_EXT_WR_WMARK_256   0x00000000
+#define E1000_CTRL_EXT_WR_WMARK_320   0x01000000
+#define E1000_CTRL_EXT_WR_WMARK_384   0x02000000
+#define E1000_CTRL_EXT_WR_WMARK_448   0x03000000
+
+/* MDI Control */
+#define E1000_MDIC_DATA_MASK 0x0000FFFF
+#define E1000_MDIC_REG_MASK  0x001F0000
+#define E1000_MDIC_REG_SHIFT 16
+#define E1000_MDIC_PHY_MASK  0x03E00000
+#define E1000_MDIC_PHY_SHIFT 21
+#define E1000_MDIC_OP_WRITE  0x04000000
+#define E1000_MDIC_OP_READ   0x08000000
+#define E1000_MDIC_READY     0x10000000
+#define E1000_MDIC_INT_EN    0x20000000
+#define E1000_MDIC_ERROR     0x40000000
+
+/* LED Control */
+#define E1000_LEDCTL_LED0_MODE_MASK  0x0000000F
+#define E1000_LEDCTL_LED0_MODE_SHIFT 0
+#define E1000_LEDCTL_LED0_IVRT       0x00000040
+#define E1000_LEDCTL_LED0_BLINK      0x00000080
+#define E1000_LEDCTL_LED1_MODE_MASK  0x00000F00
+#define E1000_LEDCTL_LED1_MODE_SHIFT 8
+#define E1000_LEDCTL_LED1_IVRT       0x00004000
+#define E1000_LEDCTL_LED1_BLINK      0x00008000
+#define E1000_LEDCTL_LED2_MODE_MASK  0x000F0000
+#define E1000_LEDCTL_LED2_MODE_SHIFT 16
+#define E1000_LEDCTL_LED2_IVRT       0x00400000
+#define E1000_LEDCTL_LED2_BLINK      0x00800000
+#define E1000_LEDCTL_LED3_MODE_MASK  0x0F000000
+#define E1000_LEDCTL_LED3_MODE_SHIFT 24
+#define E1000_LEDCTL_LED3_IVRT       0x40000000
+#define E1000_LEDCTL_LED3_BLINK      0x80000000
+
+#define E1000_LEDCTL_MODE_LINK_10_1000  0x0
+#define E1000_LEDCTL_MODE_LINK_100_1000 0x1
+#define E1000_LEDCTL_MODE_LINK_UP       0x2
+#define E1000_LEDCTL_MODE_ACTIVITY      0x3
+#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4
+#define E1000_LEDCTL_MODE_LINK_10       0x5
+#define E1000_LEDCTL_MODE_LINK_100      0x6
+#define E1000_LEDCTL_MODE_LINK_1000     0x7
+#define E1000_LEDCTL_MODE_PCIX_MODE     0x8
+#define E1000_LEDCTL_MODE_FULL_DUPLEX   0x9
+#define E1000_LEDCTL_MODE_COLLISION     0xA
+#define E1000_LEDCTL_MODE_BUS_SPEED     0xB
+#define E1000_LEDCTL_MODE_BUS_SIZE      0xC
+#define E1000_LEDCTL_MODE_PAUSED        0xD
+#define E1000_LEDCTL_MODE_LED_ON        0xE
+#define E1000_LEDCTL_MODE_LED_OFF       0xF
+
+/* Receive Address */
+#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW    0x00000001    /* Transmit desc written back */
+#define E1000_ICR_TXQE    0x00000002    /* Transmit Queue empty */
+#define E1000_ICR_LSC     0x00000004    /* Link Status Change */
+#define E1000_ICR_RXSEQ   0x00000008    /* rx sequence error */
+#define E1000_ICR_RXDMT0  0x00000010    /* rx desc min. threshold (0) */
+#define E1000_ICR_RXO     0x00000040    /* rx overrun */
+#define E1000_ICR_RXT0    0x00000080    /* rx timer intr (ring 0) */
+#define E1000_ICR_MDAC    0x00000200    /* MDIO access complete */
+#define E1000_ICR_RXCFG   0x00000400    /* RX /c/ ordered set */
+#define E1000_ICR_GPI_EN0 0x00000800    /* GP Int 0 */
+#define E1000_ICR_GPI_EN1 0x00001000    /* GP Int 1 */
+#define E1000_ICR_GPI_EN2 0x00002000    /* GP Int 2 */
+#define E1000_ICR_GPI_EN3 0x00004000    /* GP Int 3 */
+#define E1000_ICR_TXD_LOW 0x00008000
+#define E1000_ICR_SRPD    0x00010000
+
+/* Interrupt Cause Set */
+#define E1000_ICS_TXDW    E1000_ICR_TXDW        /* Transmit desc written back */
+#define E1000_ICS_TXQE    E1000_ICR_TXQE        /* Transmit Queue empty */
+#define E1000_ICS_LSC     E1000_ICR_LSC         /* Link Status Change */
+#define E1000_ICS_RXSEQ   E1000_ICR_RXSEQ       /* rx sequence error */
+#define E1000_ICS_RXDMT0  E1000_ICR_RXDMT0      /* rx desc min. threshold */
+#define E1000_ICS_RXO     E1000_ICR_RXO         /* rx overrun */
+#define E1000_ICS_RXT0    E1000_ICR_RXT0        /* rx timer intr */
+#define E1000_ICS_MDAC    E1000_ICR_MDAC        /* MDIO access complete */
+#define E1000_ICS_RXCFG   E1000_ICR_RXCFG       /* RX /c/ ordered set */
+#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0     /* GP Int 0 */
+#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1     /* GP Int 1 */
+#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2     /* GP Int 2 */
+#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3     /* GP Int 3 */
+#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_ICS_SRPD    E1000_ICR_SRPD
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW    E1000_ICR_TXDW        /* Transmit desc written back */
+#define E1000_IMS_TXQE    E1000_ICR_TXQE        /* Transmit Queue empty */
+#define E1000_IMS_LSC     E1000_ICR_LSC         /* Link Status Change */
+#define E1000_IMS_RXSEQ   E1000_ICR_RXSEQ       /* rx sequence error */
+#define E1000_IMS_RXDMT0  E1000_ICR_RXDMT0      /* rx desc min. threshold */
+#define E1000_IMS_RXO     E1000_ICR_RXO         /* rx overrun */
+#define E1000_IMS_RXT0    E1000_ICR_RXT0        /* rx timer intr */
+#define E1000_IMS_MDAC    E1000_ICR_MDAC        /* MDIO access complete */
+#define E1000_IMS_RXCFG   E1000_ICR_RXCFG       /* RX /c/ ordered set */
+#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0     /* GP Int 0 */
+#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1     /* GP Int 1 */
+#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2     /* GP Int 2 */
+#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3     /* GP Int 3 */
+#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_IMS_SRPD    E1000_ICR_SRPD
+
+/* Interrupt Mask Clear */
+#define E1000_IMC_TXDW    E1000_ICR_TXDW        /* Transmit desc written back */
+#define E1000_IMC_TXQE    E1000_ICR_TXQE        /* Transmit Queue empty */
+#define E1000_IMC_LSC     E1000_ICR_LSC         /* Link Status Change */
+#define E1000_IMC_RXSEQ   E1000_ICR_RXSEQ       /* rx sequence error */
+#define E1000_IMC_RXDMT0  E1000_ICR_RXDMT0      /* rx desc min. threshold */
+#define E1000_IMC_RXO     E1000_ICR_RXO         /* rx overrun */
+#define E1000_IMC_RXT0    E1000_ICR_RXT0        /* rx timer intr */
+#define E1000_IMC_MDAC    E1000_ICR_MDAC        /* MDIO access complete */
+#define E1000_IMC_RXCFG   E1000_ICR_RXCFG       /* RX /c/ ordered set */
+#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0     /* GP Int 0 */
+#define E1000_IMC_GPI_EN1 E1000_ICR_GPI_EN1     /* GP Int 1 */
+#define E1000_IMC_GPI_EN2 E1000_ICR_GPI_EN2     /* GP Int 2 */
+#define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3     /* GP Int 3 */
+#define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW
+#define E1000_IMC_SRPD    E1000_ICR_SRPD
+
+/* Receive Control */
+#define E1000_RCTL_RST          0x00000001      /* Software reset */
+#define E1000_RCTL_EN           0x00000002      /* enable */
+#define E1000_RCTL_SBP          0x00000004      /* store bad packet */
+#define E1000_RCTL_UPE          0x00000008      /* unicast promiscuous enable */
+#define E1000_RCTL_MPE          0x00000010      /* multicast promiscuous enab */
+#define E1000_RCTL_LPE          0x00000020      /* long packet enable */
+#define E1000_RCTL_LBM_NO       0x00000000      /* no loopback mode */
+#define E1000_RCTL_LBM_MAC      0x00000040      /* MAC loopback mode */
+#define E1000_RCTL_LBM_SLP      0x00000080      /* serial link loopback mode */
+#define E1000_RCTL_LBM_TCVR     0x000000C0      /* tcvr loopback mode */
+#define E1000_RCTL_RDMTS_HALF   0x00000000      /* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_QUAT   0x00000100      /* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_EIGTH  0x00000200      /* rx desc min threshold size */
+#define E1000_RCTL_MO_SHIFT     12              /* multicast offset shift */
+#define E1000_RCTL_MO_0         0x00000000      /* multicast offset 11:0 */
+#define E1000_RCTL_MO_1         0x00001000      /* multicast offset 12:1 */
+#define E1000_RCTL_MO_2         0x00002000      /* multicast offset 13:2 */
+#define E1000_RCTL_MO_3         0x00003000      /* multicast offset 15:4 */
+#define E1000_RCTL_MDR          0x00004000      /* multicast desc ring 0 */
+#define E1000_RCTL_BAM          0x00008000      /* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048      0x00000000      /* rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024      0x00010000      /* rx buffer size 1024 */
+#define E1000_RCTL_SZ_512       0x00020000      /* rx buffer size 512 */
+#define E1000_RCTL_SZ_256       0x00030000      /* rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384     0x00010000      /* rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192      0x00020000      /* rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096      0x00030000      /* rx buffer size 4096 */
+#define E1000_RCTL_VFE          0x00040000      /* vlan filter enable */
+#define E1000_RCTL_CFIEN        0x00080000      /* canonical form enable */
+#define E1000_RCTL_CFI          0x00100000      /* canonical form indicator */
+#define E1000_RCTL_DPF          0x00400000      /* discard pause frames */
+#define E1000_RCTL_PMCF         0x00800000      /* pass MAC control frames */
+#define E1000_RCTL_BSEX         0x02000000      /* Buffer size extension */
+
+/* Receive Descriptor */
+#define E1000_RDT_DELAY 0x0000ffff      /* Delay timer (1=1024us) */
+#define E1000_RDT_FPDB  0x80000000      /* Flush descriptor block */
+#define E1000_RDLEN_LEN 0x0007ff80      /* descriptor length */
+#define E1000_RDH_RDH   0x0000ffff      /* receive descriptor head */
+#define E1000_RDT_RDT   0x0000ffff      /* receive descriptor tail */
+
+/* Flow Control */
+#define E1000_FCRTH_RTH  0x0000FFF8     /* Mask Bits[15:3] for RTH */
+#define E1000_FCRTH_XFCE 0x80000000     /* External Flow Control Enable */
+#define E1000_FCRTL_RTL  0x0000FFF8     /* Mask Bits[15:3] for RTL */
+#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
+
+/* Receive Descriptor Control */
+#define E1000_RXDCTL_PTHRESH 0x0000003F /* RXDCTL Prefetch Threshold */
+#define E1000_RXDCTL_HTHRESH 0x00003F00 /* RXDCTL Host Threshold */
+#define E1000_RXDCTL_WTHRESH 0x003F0000 /* RXDCTL Writeback Threshold */
+#define E1000_RXDCTL_GRAN    0x01000000 /* RXDCTL Granularity */
+
+/* Transmit Descriptor Control */
+#define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */
+#define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */
+#define E1000_TXDCTL_WTHRESH 0x00FF0000 /* TXDCTL Writeback Threshold */
+#define E1000_TXDCTL_GRAN    0x01000000 /* TXDCTL Granularity */
+#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */
+#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
+
+/* Transmit Configuration Word */
+#define E1000_TXCW_FD         0x00000020        /* TXCW full duplex */
+#define E1000_TXCW_HD         0x00000040        /* TXCW half duplex */
+#define E1000_TXCW_PAUSE      0x00000080        /* TXCW sym pause request */
+#define E1000_TXCW_ASM_DIR    0x00000100        /* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE_MASK 0x00000180        /* TXCW pause request mask */
+#define E1000_TXCW_RF         0x00003000        /* TXCW remote fault */
+#define E1000_TXCW_NP         0x00008000        /* TXCW next page */
+#define E1000_TXCW_CW         0x0000ffff        /* TxConfigWord mask */
+#define E1000_TXCW_TXC        0x40000000        /* Transmit Config control */
+#define E1000_TXCW_ANE        0x80000000        /* Auto-neg enable */
+
+/* Receive Configuration Word */
+#define E1000_RXCW_CW    0x0000ffff     /* RxConfigWord mask */
+#define E1000_RXCW_NC    0x04000000     /* Receive config no carrier */
+#define E1000_RXCW_IV    0x08000000     /* Receive config invalid */
+#define E1000_RXCW_CC    0x10000000     /* Receive config change */
+#define E1000_RXCW_C     0x20000000     /* Receive config */
+#define E1000_RXCW_SYNCH 0x40000000     /* Receive config synch */
+#define E1000_RXCW_ANC   0x80000000     /* Auto-neg complete */
+
+/* Transmit Control */
+#define E1000_TCTL_RST    0x00000001    /* software reset */
+#define E1000_TCTL_EN     0x00000002    /* enable tx */
+#define E1000_TCTL_BCE    0x00000004    /* busy check enable */
+#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
+#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
+#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
+#define E1000_TCTL_SWXOFF 0x00400000    /* SW Xoff transmission */
+#define E1000_TCTL_PBE    0x00800000    /* Packet Burst Enable */
+#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
+#define E1000_TCTL_NRTU   0x02000000    /* No Re-transmit on underrun */
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_PCSS_MASK 0x000000FF   /* Packet Checksum Start */
+#define E1000_RXCSUM_IPOFL     0x00000100   /* IPv4 checksum offload */
+#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
+#define E1000_RXCSUM_IPV6OFL   0x00000400   /* IPv6 checksum offload */
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_APME       0x00000001 /* APM Enable */
+#define E1000_WUC_PME_EN     0x00000002 /* PME Enable */
+#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */
+#define E1000_WUC_APMPME     0x00000008 /* Assert PME on APM Wakeup */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
+#define E1000_WUFC_ARP  0x00000020 /* ARP Request Packet Wakeup Enable */
+#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
+#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
+#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
+#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
+#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */
+#define E1000_WUFC_ALL_FILTERS 0x000F00FF /* Mask for all wakeup filters */
+#define E1000_WUFC_FLX_OFFSET 16       /* Offset to the Flexible Filters bits */
+#define E1000_WUFC_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */
+
+/* Wake Up Status */
+#define E1000_WUS_LNKC 0x00000001 /* Link Status Changed */
+#define E1000_WUS_MAG  0x00000002 /* Magic Packet Received */
+#define E1000_WUS_EX   0x00000004 /* Directed Exact Received */
+#define E1000_WUS_MC   0x00000008 /* Directed Multicast Received */
+#define E1000_WUS_BC   0x00000010 /* Broadcast Received */
+#define E1000_WUS_ARP  0x00000020 /* ARP Request Packet Received */
+#define E1000_WUS_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Received */
+#define E1000_WUS_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Received */
+#define E1000_WUS_FLX0 0x00010000 /* Flexible Filter 0 Match */
+#define E1000_WUS_FLX1 0x00020000 /* Flexible Filter 1 Match */
+#define E1000_WUS_FLX2 0x00040000 /* Flexible Filter 2 Match */
+#define E1000_WUS_FLX3 0x00080000 /* Flexible Filter 3 Match */
+#define E1000_WUS_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
+#define E1000_MANC_R_ON_FORCE    0x00000004 /* Reset on Force TCO - RO */
+#define E1000_MANC_RMCP_EN       0x00000100 /* Enable RCMP 026Fh Filtering */
+#define E1000_MANC_0298_EN       0x00000200 /* Enable RCMP 0298h Filtering */
+#define E1000_MANC_IPV4_EN       0x00000400 /* Enable IPv4 */
+#define E1000_MANC_IPV6_EN       0x00000800 /* Enable IPv6 */
+#define E1000_MANC_SNAP_EN       0x00001000 /* Accept LLC/SNAP */
+#define E1000_MANC_ARP_EN        0x00002000 /* Enable ARP Request Filtering */
+#define E1000_MANC_NEIGHBOR_EN   0x00004000 /* Enable Neighbor Discovery 
+                                             * Filtering */
+#define E1000_MANC_TCO_RESET     0x00010000 /* TCO Reset Occurred */
+#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
+#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */
+#define E1000_MANC_SMB_REQ       0x01000000 /* SMBus Request */
+#define E1000_MANC_SMB_GNT       0x02000000 /* SMBus Grant */
+#define E1000_MANC_SMB_CLK_IN    0x04000000 /* SMBus Clock In */
+#define E1000_MANC_SMB_DATA_IN   0x08000000 /* SMBus Data In */
+#define E1000_MANC_SMB_DATA_OUT  0x10000000 /* SMBus Data Out */
+#define E1000_MANC_SMB_CLK_OUT   0x20000000 /* SMBus Clock Out */
+
+#define E1000_MANC_SMB_DATA_OUT_SHIFT  28 /* SMBus Data Out Shift */
+#define E1000_MANC_SMB_CLK_OUT_SHIFT   29 /* SMBus Clock Out Shift */
+
+/* Wake Up Packet Length */
+#define E1000_WUPL_LENGTH_MASK 0x0FFF   /* Only the lower 12 bits are valid */
+
+#define E1000_MDALIGN          4096
+
+/* EEPROM Commands */
+#define EEPROM_READ_OPCODE  0x6  /* EERPOM read opcode */
+#define EEPROM_WRITE_OPCODE 0x5  /* EERPOM write opcode */
+#define EEPROM_ERASE_OPCODE 0x7  /* EERPOM erase opcode */
+#define EEPROM_EWEN_OPCODE  0x13 /* EERPOM erase/write enable */
+#define EEPROM_EWDS_OPCODE  0x10 /* EERPOM erast/write disable */
+
+/* EEPROM Word Offsets */
+#define EEPROM_COMPAT              0x0003
+#define EEPROM_ID_LED_SETTINGS     0x0004
+#define EEPROM_INIT_CONTROL1_REG   0x000A
+#define EEPROM_INIT_CONTROL2_REG   0x000F
+#define EEPROM_FLASH_VERSION       0x0032
+#define EEPROM_CHECKSUM_REG        0x003F
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000 0x0000
+#define ID_LED_RESERVED_FFFF 0xFFFF
+#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2 << 12) | \
+                              (ID_LED_OFF1_OFF2 << 8) | \
+                              (ID_LED_DEF1_DEF2 << 4) | \
+                              (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2     0x1
+#define ID_LED_DEF1_ON2      0x2
+#define ID_LED_DEF1_OFF2     0x3
+#define ID_LED_ON1_DEF2      0x4
+#define ID_LED_ON1_ON2       0x5
+#define ID_LED_ON1_OFF2      0x6
+#define ID_LED_OFF1_DEF2     0x7
+#define ID_LED_OFF1_ON2      0x8
+#define ID_LED_OFF1_OFF2     0x9
+
+/* Mask bits for fields in Word 0x03 of the EEPROM */
+#define EEPROM_COMPAT_SERVER 0x0400
+#define EEPROM_COMPAT_CLIENT 0x0200
+
+/* Mask bits for fields in Word 0x0a of the EEPROM */
+#define EEPROM_WORD0A_ILOS   0x0010
+#define EEPROM_WORD0A_SWDPIO 0x01E0
+#define EEPROM_WORD0A_LRST   0x0200
+#define EEPROM_WORD0A_FD     0x0400
+#define EEPROM_WORD0A_66MHZ  0x0800
+
+/* Mask bits for fields in Word 0x0f of the EEPROM */
+#define EEPROM_WORD0F_PAUSE_MASK 0x3000
+#define EEPROM_WORD0F_PAUSE      0x1000
+#define EEPROM_WORD0F_ASM_DIR    0x2000
+#define EEPROM_WORD0F_ANE        0x0800
+#define EEPROM_WORD0F_SWPDIO_EXT 0x00F0
+
+/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */
+#define EEPROM_SUM 0xBABA
+
+/* EEPROM Map defines (WORD OFFSETS)*/
+#define EEPROM_NODE_ADDRESS_BYTE_0 0
+#define EEPROM_PBA_BYTE_1          8
+
+/* EEPROM Map Sizes (Byte Counts) */
+#define PBA_SIZE 4
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD       16
+#define E1000_CT_SHIFT                  4
+#define E1000_COLLISION_DISTANCE        64
+#define E1000_FDX_COLLISION_DISTANCE    E1000_COLLISION_DISTANCE
+#define E1000_HDX_COLLISION_DISTANCE    E1000_COLLISION_DISTANCE
+#define E1000_GB_HDX_COLLISION_DISTANCE 512
+#define E1000_COLD_SHIFT                12
+
+/* The number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+
+/* Default values for the transmit IPG register */
+#define DEFAULT_82542_TIPG_IPGT        10
+#define DEFAULT_82543_TIPG_IPGT_FIBER  9
+#define DEFAULT_82543_TIPG_IPGT_COPPER 8
+
+#define E1000_TIPG_IPGT_MASK  0x000003FF
+#define E1000_TIPG_IPGR1_MASK 0x000FFC00
+#define E1000_TIPG_IPGR2_MASK 0x3FF00000
+
+#define DEFAULT_82542_TIPG_IPGR1 2
+#define DEFAULT_82543_TIPG_IPGR1 8
+#define E1000_TIPG_IPGR1_SHIFT  10
+
+#define DEFAULT_82542_TIPG_IPGR2 10
+#define DEFAULT_82543_TIPG_IPGR2 6
+#define E1000_TIPG_IPGR2_SHIFT  20
+
+#define E1000_TXDMAC_DPP 0x00000001
+
+/* Adaptive IFS defines */
+#define TX_THRESHOLD_START     8
+#define TX_THRESHOLD_INCREMENT 10
+#define TX_THRESHOLD_DECREMENT 1
+#define TX_THRESHOLD_STOP      190
+#define TX_THRESHOLD_DISABLE   0
+#define TX_THRESHOLD_TIMER_MS  10000
+#define MIN_NUM_XMITS          1000
+#define IFS_MAX                80
+#define IFS_STEP               10
+#define IFS_MIN                40
+#define IFS_RATIO              4
+
+/* PBA constants */
+#define E1000_PBA_16K 0x0010    /* 16KB, default TX allocation */
+#define E1000_PBA_24K 0x0018
+#define E1000_PBA_40K 0x0028
+#define E1000_PBA_48K 0x0030    /* 48KB, default RX allocation */
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE         0x8808
+
+/* The historical defaults for the flow control values are given below. */
+#define FC_DEFAULT_HI_THRESH        (0x8000)    /* 32KB */
+#define FC_DEFAULT_LO_THRESH        (0x4000)    /* 16KB */
+#define FC_DEFAULT_TX_TIMER         (0x100)     /* ~130 us */
+
+/* PCIX Config space */
+#define PCIX_COMMAND_REGISTER    0xE6
+#define PCIX_STATUS_REGISTER_LO  0xE8
+#define PCIX_STATUS_REGISTER_HI  0xEA
+
+#define PCIX_COMMAND_MMRBC_MASK      0x000C
+#define PCIX_COMMAND_MMRBC_SHIFT     0x2
+#define PCIX_STATUS_HI_MMRBC_MASK    0x0060
+#define PCIX_STATUS_HI_MMRBC_SHIFT   0x5
+#define PCIX_STATUS_HI_MMRBC_4K      0x3
+#define PCIX_STATUS_HI_MMRBC_2K      0x2
+
+
+/* The number of bits that we need to shift right to move the "pause"
+ * bits from the EEPROM (bits 13:12) to the "pause" (bits 8:7) field
+ * in the TXCW register 
+ */
+#define PAUSE_SHIFT 5
+
+/* The number of bits that we need to shift left to move the "SWDPIO"
+ * bits from the EEPROM (bits 8:5) to the "SWDPIO" (bits 25:22) field
+ * in the CTRL register 
+ */
+#define SWDPIO_SHIFT 17
+
+/* The number of bits that we need to shift left to move the "SWDPIO_EXT"
+ * bits from the EEPROM word F (bits 7:4) to the bits 11:8 of The
+ * Extended CTRL register.
+ * in the CTRL register 
+ */
+#define SWDPIO__EXT_SHIFT 4
+
+/* The number of bits that we need to shift left to move the "ILOS"
+ * bit from the EEPROM (bit 4) to the "ILOS" (bit 7) field
+ * in the CTRL register 
+ */
+#define ILOS_SHIFT  3
+
+
+#define RECEIVE_BUFFER_ALIGN_SIZE  (256)
+
+/* The number of milliseconds we wait for auto-negotiation to complete */
+#define LINK_UP_TIMEOUT             500
+
+#define E1000_TX_BUFFER_SIZE ((uint32_t)1514)
+
+/* The carrier extension symbol, as received by the NIC. */
+#define CARRIER_EXTENSION   0x0F
+
+/* TBI_ACCEPT macro definition:
+ *
+ * This macro requires:
+ *      adapter = a pointer to struct e1000_hw 
+ *      status = the 8 bit status field of the RX descriptor with EOP set
+ *      error = the 8 bit error field of the RX descriptor with EOP set
+ *      length = the sum of all the length fields of the RX descriptors that
+ *               make up the current frame
+ *      last_byte = the last byte of the frame DMAed by the hardware
+ *      max_frame_length = the maximum frame length we want to accept.
+ *      min_frame_length = the minimum frame length we want to accept.
+ *
+ * This macro is a conditional that should be used in the interrupt 
+ * handler's Rx processing routine when RxErrors have been detected.
+ *
+ * Typical use:
+ *  ...
+ *  if (TBI_ACCEPT) {
+ *      accept_frame = TRUE;
+ *      e1000_tbi_adjust_stats(adapter, MacAddress);
+ *      frame_length--;
+ *  } else {
+ *      accept_frame = FALSE;
+ *  }
+ *  ...
+ */
+
+#define TBI_ACCEPT(adapter, status, errors, length, last_byte) \
+    ((adapter)->tbi_compatibility_on && \
+     (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
+     ((last_byte) == CARRIER_EXTENSION) && \
+     (((status) & E1000_RXD_STAT_VP) ? \
+          (((length) > ((adapter)->min_frame_size - VLAN_TAG_SIZE)) && \
+           ((length) <= ((adapter)->max_frame_size + 1))) : \
+          (((length) > (adapter)->min_frame_size) && \
+           ((length) <= ((adapter)->max_frame_size + VLAN_TAG_SIZE + 1)))))
+
+
+/* Structures, enums, and macros for the PHY */
+
+/* Bit definitions for the Management Data IO (MDIO) and Management Data
+ * Clock (MDC) pins in the Device Control Register.
+ */
+#define E1000_CTRL_PHY_RESET_DIR  E1000_CTRL_SWDPIO0
+#define E1000_CTRL_PHY_RESET      E1000_CTRL_SWDPIN0
+#define E1000_CTRL_MDIO_DIR       E1000_CTRL_SWDPIO2
+#define E1000_CTRL_MDIO           E1000_CTRL_SWDPIN2
+#define E1000_CTRL_MDC_DIR        E1000_CTRL_SWDPIO3
+#define E1000_CTRL_MDC            E1000_CTRL_SWDPIN3
+#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR
+#define E1000_CTRL_PHY_RESET4     E1000_CTRL_EXT_SDP4_DATA
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CTRL         0x00 /* Control Register */
+#define PHY_STATUS       0x01 /* Status Regiser */
+#define PHY_ID1          0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2          0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV  0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY   0x05 /* Link Partner Ability (Base Page) */
+#define PHY_AUTONEG_EXP  0x06 /* Autoneg Expansion Reg */
+#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */
+#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */
+#define PHY_1000T_CTRL   0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
+#define PHY_EXT_STATUS   0x0F /* Extended Status Reg */
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
+#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
+#define M88E1000_INT_ENABLE        0x12  /* Interrupt Enable Register */
+#define M88E1000_INT_STATUS        0x13  /* Interrupt Status Register */
+#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
+#define M88E1000_RX_ERR_CNTR       0x15  /* Receive Error Counter */
+
+#define MAX_PHY_REG_ADDRESS 0x1F        /* 5 bit address bus (0-0x1F) */
+
+/* PHY Control Register */
+#define MII_CR_SPEED_SELECT_MSB 0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_COLL_TEST_ENABLE 0x0080  /* Collision test enable */
+#define MII_CR_FULL_DUPLEX      0x0100  /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG 0x0200  /* Restart auto negotiation */
+#define MII_CR_ISOLATE          0x0400  /* Isolate PHY from MII */
+#define MII_CR_POWER_DOWN       0x0800  /* Power down */
+#define MII_CR_AUTO_NEG_EN      0x1000  /* Auto Neg Enable */
+#define MII_CR_SPEED_SELECT_LSB 0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_LOOPBACK         0x4000  /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET            0x8000  /* 0 = normal, 1 = PHY reset */
+
+/* PHY Status Register */
+#define MII_SR_EXTENDED_CAPS     0x0001 /* Extended register capabilities */
+#define MII_SR_JABBER_DETECT     0x0002 /* Jabber Detected */
+#define MII_SR_LINK_STATUS       0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_CAPS      0x0008 /* Auto Neg Capable */
+#define MII_SR_REMOTE_FAULT      0x0010 /* Remote Fault Detect */
+#define MII_SR_AUTONEG_COMPLETE  0x0020 /* Auto Neg Complete */
+#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
+#define MII_SR_EXTENDED_STATUS   0x0100 /* Ext. status info in Reg 0x0F */
+#define MII_SR_100T2_HD_CAPS     0x0200 /* 100T2 Half Duplex Capable */
+#define MII_SR_100T2_FD_CAPS     0x0400 /* 100T2 Full Duplex Capable */
+#define MII_SR_10T_HD_CAPS       0x0800 /* 10T   Half Duplex Capable */
+#define MII_SR_10T_FD_CAPS       0x1000 /* 10T   Full Duplex Capable */
+#define MII_SR_100X_HD_CAPS      0x2000 /* 100X  Half Duplex Capable */
+#define MII_SR_100X_FD_CAPS      0x4000 /* 100X  Full Duplex Capable */
+#define MII_SR_100T4_CAPS        0x8000 /* 100T4 Capable */
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_SELECTOR_FIELD 0x0001   /* indicates IEEE 802.3 CSMA/CD */
+#define NWAY_AR_10T_HD_CAPS    0x0020   /* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS    0x0040   /* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS  0x0080   /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS  0x0100   /* 100TX Full Duplex Capable */
+#define NWAY_AR_100T4_CAPS     0x0200   /* 100T4 Capable */
+#define NWAY_AR_PAUSE          0x0400   /* Pause operation desired */
+#define NWAY_AR_ASM_DIR        0x0800   /* Asymmetric Pause Direction bit */
+#define NWAY_AR_REMOTE_FAULT   0x2000   /* Remote Fault detected */
+#define NWAY_AR_NEXT_PAGE      0x8000   /* Next Page ability supported */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */
+#define NWAY_LPAR_10T_HD_CAPS    0x0020 /* LP is 10T   Half Duplex Capable */
+#define NWAY_LPAR_10T_FD_CAPS    0x0040 /* LP is 10T   Full Duplex Capable */
+#define NWAY_LPAR_100TX_HD_CAPS  0x0080 /* LP is 100TX Half Duplex Capable */
+#define NWAY_LPAR_100TX_FD_CAPS  0x0100 /* LP is 100TX Full Duplex Capable */
+#define NWAY_LPAR_100T4_CAPS     0x0200 /* LP is 100T4 Capable */
+#define NWAY_LPAR_PAUSE          0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR        0x0800 /* LP Asymmetric Pause Direction bit */
+#define NWAY_LPAR_REMOTE_FAULT   0x2000 /* LP has detected Remote Fault */
+#define NWAY_LPAR_ACKNOWLEDGE    0x4000 /* LP has rx'd link code word */
+#define NWAY_LPAR_NEXT_PAGE      0x8000 /* Next Page ability supported */
+
+/* Autoneg Expansion Register */
+#define NWAY_ER_LP_NWAY_CAPS      0x0001 /* LP has Auto Neg Capability */
+#define NWAY_ER_PAGE_RXD          0x0002 /* LP is 10T   Half Duplex Capable */
+#define NWAY_ER_NEXT_PAGE_CAPS    0x0004 /* LP is 10T   Full Duplex Capable */
+#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */
+#define NWAY_ER_PAR_DETECT_FAULT  0x0100 /* LP is 100TX Full Duplex Capable */
+
+/* Next Page TX Register */
+#define NPTX_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */
+#define NPTX_TOGGLE         0x0800 /* Toggles between exchanges
+                                    * of different NP
+                                    */
+#define NPTX_ACKNOWLDGE2    0x1000 /* 1 = will comply with msg
+                                    * 0 = cannot comply with msg
+                                    */
+#define NPTX_MSG_PAGE       0x2000 /* formatted(1)/unformatted(0) pg */
+#define NPTX_NEXT_PAGE      0x8000 /* 1 = addition NP will follow 
+                                    * 0 = sending last NP
+                                    */
+
+/* Link Partner Next Page Register */
+#define LP_RNPR_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */
+#define LP_RNPR_TOGGLE         0x0800 /* Toggles between exchanges
+                                       * of different NP
+                                       */
+#define LP_RNPR_ACKNOWLDGE2    0x1000 /* 1 = will comply with msg 
+                                       * 0 = cannot comply with msg
+                                       */
+#define LP_RNPR_MSG_PAGE       0x2000  /* formatted(1)/unformatted(0) pg */
+#define LP_RNPR_ACKNOWLDGE     0x4000  /* 1 = ACK / 0 = NO ACK */
+#define LP_RNPR_NEXT_PAGE      0x8000  /* 1 = addition NP will follow
+                                        * 0 = sending last NP 
+                                        */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE      0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS         0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS         0x0200 /* Advertise 1000T FD capability  */
+#define CR_1000T_REPEATER_DTE    0x0400 /* 1=Repeater/switch device port */
+                                        /* 0=DTE device */
+#define CR_1000T_MS_VALUE        0x0800 /* 1=Configure PHY as Master */
+                                        /* 0=Configure PHY as Slave */
+#define CR_1000T_MS_ENABLE       0x1000 /* 1=Master/Slave manual config value */
+                                        /* 0=Automatic Master/Slave config */
+#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
+#define CR_1000T_TEST_MODE_1     0x2000 /* Transmit Waveform test */
+#define CR_1000T_TEST_MODE_2     0x4000 /* Master Transmit Jitter test */
+#define CR_1000T_TEST_MODE_3     0x6000 /* Slave Transmit Jitter test */
+#define CR_1000T_TEST_MODE_4     0x8000 /* Transmitter Distortion test */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_IDLE_ERROR_CNT   0x00FF /* Num idle errors since last read */
+#define SR_1000T_ASYM_PAUSE_DIR   0x0100 /* LP asymmetric pause direction bit */
+#define SR_1000T_LP_HD_CAPS       0x0400 /* LP is 1000T HD capable */
+#define SR_1000T_LP_FD_CAPS       0x0800 /* LP is 1000T FD capable */
+#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS  0x2000 /* Local receiver OK */
+#define SR_1000T_MS_CONFIG_RES    0x4000 /* 1=Local TX is Master, 0=Slave */
+#define SR_1000T_MS_CONFIG_FAULT  0x8000 /* Master/Slave config fault */
+#define SR_1000T_REMOTE_RX_STATUS_SHIFT 12
+#define SR_1000T_LOCAL_RX_STATUS_SHIFT  13
+
+/* Extended Status Register */
+#define IEEE_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */
+#define IEEE_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */
+#define IEEE_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */
+#define IEEE_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */
+
+#define PHY_TX_POLARITY_MASK   0x0100 /* register 10h bit 8 (polarity bit) */
+#define PHY_TX_NORMAL_POLARITY 0      /* register 10h bit 8 (normal polarity) */
+
+#define AUTO_POLARITY_DISABLE  0x0010 /* register 11h bit 4 */
+                                      /* (0=enable, 1=disable) */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_JABBER_DISABLE    0x0001 /* 1=Jabber Function disabled */
+#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
+#define M88E1000_PSCR_SQE_TEST          0x0004 /* 1=SQE Test enabled */
+#define M88E1000_PSCR_CLK125_DISABLE    0x0010 /* 1=CLK125 low, 
+                                                * 0=CLK125 toggling
+                                                */
+#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
+                                               /* Manual MDI configuration */
+#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
+#define M88E1000_PSCR_AUTO_X_1000T     0x0040  /* 1000BASE-T: Auto crossover,
+                                                *  100BASE-TX/10BASE-T: 
+                                                *  MDI Mode
+                                                */
+#define M88E1000_PSCR_AUTO_X_MODE      0x0060  /* Auto crossover enabled 
+                                                * all speeds. 
+                                                */
+#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE 0x0080 
+                                        /* 1=Enable Extended 10BASE-T distance
+                                         * (Lower 10BASE-T RX Threshold)
+                                         * 0=Normal 10BASE-T RX Threshold */
+#define M88E1000_PSCR_MII_5BIT_ENABLE      0x0100
+                                        /* 1=5-Bit interface in 100BASE-TX
+                                         * 0=MII interface in 100BASE-TX */
+#define M88E1000_PSCR_SCRAMBLER_DISABLE    0x0200 /* 1=Scrambler disable */
+#define M88E1000_PSCR_FORCE_LINK_GOOD      0x0400 /* 1=Force link good */
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX     0x0800 /* 1=Assert CRS on Transmit */
+
+#define M88E1000_PSCR_POLARITY_REVERSAL_SHIFT    1
+#define M88E1000_PSCR_AUTO_X_MODE_SHIFT          5
+#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT 7
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_JABBER             0x0001 /* 1=Jabber */
+#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
+#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
+#define M88E1000_PSSR_CABLE_LENGTH       0x0380 /* 0=<50M;1=50-80M;2=80-110M;
+                                            * 3=110-140M;4=>140M */
+#define M88E1000_PSSR_LINK               0x0400 /* 1=Link up, 0=Link down */
+#define M88E1000_PSSR_SPD_DPLX_RESOLVED  0x0800 /* 1=Speed & Duplex resolved */
+#define M88E1000_PSSR_PAGE_RCVD          0x1000 /* 1=Page received */
+#define M88E1000_PSSR_DPLX               0x2000 /* 1=Duplex 0=Half Duplex */
+#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
+#define M88E1000_PSSR_10MBS              0x0000 /* 00=10Mbs */
+#define M88E1000_PSSR_100MBS             0x4000 /* 01=100Mbs */
+#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
+
+#define M88E1000_PSSR_REV_POLARITY_SHIFT 1
+#define M88E1000_PSSR_MDIX_SHIFT         6
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
+
+/* M88E1000 Extended PHY Specific Control Register */
+#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */
+#define M88E1000_EPSCR_DOWN_NO_IDLE   0x8000 /* 1=Lost lock detect enabled.
+                                              * Will assert lost lock and bring
+                                              * link down if idle not seen
+                                              * within 1ms in 1000BASE-T 
+                                              */
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000    
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X   0x0400
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X   0x0800
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X   0x0C00
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS   0x0000
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X    0x0200
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X    0x0300
+#define M88E1000_EPSCR_TX_CLK_2_5     0x0060 /* 2.5 MHz TX_CLK */
+#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
+#define M88E1000_EPSCR_TX_CLK_0       0x0000 /* NO  TX_CLK */
+
+/* Bit definitions for valid PHY IDs. */
+#define M88E1000_E_PHY_ID  0x01410C50
+#define M88E1000_I_PHY_ID  0x01410C30
+#define M88E1011_I_PHY_ID  0x01410C20
+#define M88E1000_12_PHY_ID M88E1000_E_PHY_ID
+#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID
+#define M88E1011_I_REV_4   0x04
+
+/* Miscellaneous PHY bit definitions. */
+#define PHY_PREAMBLE        0xFFFFFFFF
+#define PHY_SOF             0x01
+#define PHY_OP_READ         0x02
+#define PHY_OP_WRITE        0x01
+#define PHY_TURNAROUND      0x02
+#define PHY_PREAMBLE_SIZE   32
+#define MII_CR_SPEED_1000   0x0040
+#define MII_CR_SPEED_100    0x2000
+#define MII_CR_SPEED_10     0x0000
+#define E1000_PHY_ADDRESS   0x01
+#define PHY_AUTO_NEG_TIME   45  /* 4.5 Seconds */
+#define PHY_FORCE_TIME      20  /* 2.0 Seconds */
+#define PHY_REVISION_MASK   0xFFFFFFF0
+#define DEVICE_SPEED_MASK   0x00000300  /* Device Ctrl Reg Speed Mask */
+#define REG4_SPEED_MASK     0x01E0
+#define REG9_SPEED_MASK     0x0300
+#define ADVERTISE_10_HALF   0x0001
+#define ADVERTISE_10_FULL   0x0002
+#define ADVERTISE_100_HALF  0x0004
+#define ADVERTISE_100_FULL  0x0008
+#define ADVERTISE_1000_HALF 0x0010
+#define ADVERTISE_1000_FULL 0x0020
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x002F  /* Everything but 1000-Half */
+
+#endif /* _E1000_HW_H_ */
diff --git a/xen/drivers/net/e1000/e1000_main.c b/xen/drivers/net/e1000/e1000_main.c
new file mode 100644
index 0000000000..8afbe394c2
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_main.c
@@ -0,0 +1,2279 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000.h"
+
+/* Change Log
+ *
+ * 4.4.19       11/27/02
+ *   o Feature: Added user-settable knob for interrupt throttle rate (ITR).
+ *   o Cleanup: removed large static array allocations.
+ *   o Cleanup: C99 struct initializer format.
+ *   o Bug fix: restore VLAN settings when interface is brought up.
+ *   o Bug fix: return cleanly in probe if error in detecting MAC type.
+ *   o Bug fix: Wake up on magic packet by default only if enabled in eeprom.
+ *   o Bug fix: Validate MAC address in set_mac.
+ *   o Bug fix: Throw away zero-length Tx skbs.
+ *   o Bug fix: Make ethtool EEPROM acceses work on older versions of ethtool.
+ * 
+ * 4.4.12       10/15/02
+ *   o Clean up: use members of pci_device rather than direct calls to
+ *     pci_read_config_word.
+ *   o Bug fix: changed default flow control settings.
+ *   o Clean up: ethtool file now has an inclusive list for adapters in the
+ *     Wake-On-LAN capabilities instead of an exclusive list.
+ *   o Bug fix: miscellaneous WoL bug fixes.
+ *   o Added software interrupt for clearing rx ring
+ *   o Bug fix: easier to undo "forcing" of 1000/fd using ethtool.
+ *   o Now setting netdev->mem_end in e1000_probe.
+ *   o Clean up: Moved tx_timeout from interrupt context to process context
+ *     using schedule_task.
+ * 
+ * 4.3.15       8/9/02
+ */
+
+char e1000_driver_name[] = "e1000";
+char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
+char e1000_driver_version[] = "4.4.19-k2";
+char e1000_copyright[] = "Copyright (c) 1999-2002 Intel Corporation.";
+
+/* e1000_pci_tbl - PCI Device ID Table
+ *
+ * Private driver_data field (last one) stores an index into e1000_strings
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, String Index }
+ */
+static struct pci_device_id e1000_pci_tbl[] __devinitdata = {
+	/* Intel(R) PRO/1000 Network Connection */
+	{0x8086, 0x1000, 0x8086, 0x1000, 0, 0, 0},
+	{0x8086, 0x1001, 0x8086, 0x1003, 0, 0, 0},
+	{0x8086, 0x1004, 0x8086, 0x1004, 0, 0, 0},
+	{0x8086, 0x1008, 0x8086, 0x1107, 0, 0, 0},
+	{0x8086, 0x1009, 0x8086, 0x1109, 0, 0, 0},
+	{0x8086, 0x100C, 0x8086, 0x1112, 0, 0, 0},
+	{0x8086, 0x100E, 0x8086, 0x001E, 0, 0, 0},
+	/* Compaq Gigabit Ethernet Server Adapter */
+	{0x8086, 0x1000, 0x0E11, PCI_ANY_ID, 0, 0, 1},
+	{0x8086, 0x1001, 0x0E11, PCI_ANY_ID, 0, 0, 1},
+	{0x8086, 0x1004, 0x0E11, PCI_ANY_ID, 0, 0, 1},
+	/* IBM Mobile, Desktop & Server Adapters */
+	{0x8086, 0x1000, 0x1014, PCI_ANY_ID, 0, 0, 2},
+	{0x8086, 0x1001, 0x1014, PCI_ANY_ID, 0, 0, 2},
+	{0x8086, 0x1004, 0x1014, PCI_ANY_ID, 0, 0, 2},
+	/* Generic */
+	{0x8086, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x100C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x100D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x100E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x100F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1011, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1010, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1016, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x1017, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0x8086, 0x101E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	/* required last entry */
+	{0,}
+};
+
+MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
+
+static char *e1000_strings[] = {
+	"Intel(R) PRO/1000 Network Connection",
+	"Compaq Gigabit Ethernet Server Adapter",
+	"IBM Mobile, Desktop & Server Adapters"
+};
+
+/* Local Function Prototypes */
+
+int e1000_up(struct e1000_adapter *adapter);
+void e1000_down(struct e1000_adapter *adapter);
+void e1000_reset(struct e1000_adapter *adapter);
+
+static int e1000_init_module(void);
+static void e1000_exit_module(void);
+static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static void e1000_remove(struct pci_dev *pdev);
+static int e1000_sw_init(struct e1000_adapter *adapter);
+static int e1000_open(struct net_device *netdev);
+static int e1000_close(struct net_device *netdev);
+static int e1000_setup_tx_resources(struct e1000_adapter *adapter);
+static int e1000_setup_rx_resources(struct e1000_adapter *adapter);
+static void e1000_configure_tx(struct e1000_adapter *adapter);
+static void e1000_configure_rx(struct e1000_adapter *adapter);
+static void e1000_setup_rctl(struct e1000_adapter *adapter);
+static void e1000_clean_tx_ring(struct e1000_adapter *adapter);
+static void e1000_clean_rx_ring(struct e1000_adapter *adapter);
+static void e1000_free_tx_resources(struct e1000_adapter *adapter);
+static void e1000_free_rx_resources(struct e1000_adapter *adapter);
+static void e1000_set_multi(struct net_device *netdev);
+static void e1000_update_phy_info(unsigned long data);
+static void e1000_watchdog(unsigned long data);
+static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
+static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
+static int e1000_set_mac(struct net_device *netdev, void *p);
+static void e1000_update_stats(struct e1000_adapter *adapter);
+static inline void e1000_irq_disable(struct e1000_adapter *adapter);
+static inline void e1000_irq_enable(struct e1000_adapter *adapter);
+static void e1000_intr(int irq, void *data, struct pt_regs *regs);
+static void e1000_clean_tx_irq(struct e1000_adapter *adapter);
+static void e1000_clean_rx_irq(struct e1000_adapter *adapter);
+static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter);
+static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
+static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
+static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
+static inline void e1000_rx_checksum(struct e1000_adapter *adapter,
+                                     struct e1000_rx_desc *rx_desc,
+                                     struct sk_buff *skb);
+static void e1000_tx_timeout(struct net_device *dev);
+static void e1000_tx_timeout_task(struct net_device *dev);
+
+static void e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp);
+static void e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid);
+static void e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid);
+static void e1000_restore_vlan(struct e1000_adapter *adapter);
+
+static int e1000_notify_reboot(struct notifier_block *, unsigned long event, void *ptr);
+static int e1000_suspend(struct pci_dev *pdev, uint32_t state);
+#ifdef CONFIG_PM
+static int e1000_resume(struct pci_dev *pdev);
+#endif
+
+struct notifier_block e1000_notifier_reboot = {
+	.notifier_call	= e1000_notify_reboot,
+	.next		= NULL,
+	.priority	= 0
+};
+
+/* Exported from other modules */
+
+extern void e1000_check_options(struct e1000_adapter *adapter);
+extern int e1000_ethtool_ioctl(struct net_device *netdev, struct ifreq *ifr);
+
+static struct pci_driver e1000_driver = {
+	.name     = e1000_driver_name,
+	.id_table = e1000_pci_tbl,
+	.probe    = e1000_probe,
+	.remove   = __devexit_p(e1000_remove),
+	/* Power Managment Hooks */
+#ifdef CONFIG_PM
+	.suspend  = e1000_suspend,
+	.resume   = e1000_resume
+#endif
+};
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
+MODULE_LICENSE("GPL");
+
+/**
+ * e1000_init_module - Driver Registration Routine
+ *
+ * e1000_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+
+static int __init
+e1000_init_module(void)
+{
+	int ret;
+
+#if 0 /* Avoid disconcerting noise. */
+	printk(KERN_INFO "%s - version %s\n",
+	       e1000_driver_string, e1000_driver_version);
+
+	printk(KERN_INFO "%s\n", e1000_copyright);
+#endif
+
+	ret = pci_module_init(&e1000_driver);
+//	if(ret >= 0)
+//		register_reboot_notifier(&e1000_notifier_reboot);
+	return ret;
+}
+
+module_init(e1000_init_module);
+
+/**
+ * e1000_exit_module - Driver Exit Cleanup Routine
+ *
+ * e1000_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+
+static void __exit
+e1000_exit_module(void)
+{
+//	unregister_reboot_notifier(&e1000_notifier_reboot);
+	pci_unregister_driver(&e1000_driver);
+}
+
+module_exit(e1000_exit_module);
+
+
+int
+e1000_up(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	if(request_irq(netdev->irq, &e1000_intr, SA_SHIRQ | SA_SAMPLE_RANDOM,
+	               netdev->name, netdev))
+		return -1;
+
+	/* hardware has been reset, we need to reload some things */
+
+	e1000_set_multi(netdev);
+	e1000_restore_vlan(adapter);
+
+	e1000_configure_tx(adapter);
+	e1000_setup_rctl(adapter);
+	e1000_configure_rx(adapter);
+	e1000_alloc_rx_buffers(adapter);
+
+	mod_timer(&adapter->watchdog_timer, jiffies);
+	e1000_irq_enable(adapter);
+
+	return 0;
+}
+
+void
+e1000_down(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	e1000_irq_disable(adapter);
+	free_irq(netdev->irq, netdev);
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+
+	e1000_reset(adapter);
+	e1000_clean_tx_ring(adapter);
+	e1000_clean_rx_ring(adapter);
+}
+
+void
+e1000_reset(struct e1000_adapter *adapter)
+{
+	/* Repartition Pba for greater than 9k mtu
+	 * To take effect CTRL.RST is required.
+	 */
+
+	if(adapter->rx_buffer_len > E1000_RXBUFFER_8192)
+		E1000_WRITE_REG(&adapter->hw, PBA, E1000_JUMBO_PBA);
+	else
+		E1000_WRITE_REG(&adapter->hw, PBA, E1000_DEFAULT_PBA);
+
+	adapter->hw.fc = adapter->hw.original_fc;
+	e1000_reset_hw(&adapter->hw);
+	if(adapter->hw.mac_type >= e1000_82544)
+		E1000_WRITE_REG(&adapter->hw, WUC, 0);
+	e1000_init_hw(&adapter->hw);
+	e1000_reset_adaptive(&adapter->hw);
+	e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
+}
+
+/**
+ * e1000_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in e1000_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * e1000_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+
+static int __devinit
+e1000_probe(struct pci_dev *pdev,
+            const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct e1000_adapter *adapter;
+	static int cards_found = 0;
+	unsigned long mmio_start;
+	int mmio_len;
+	int pci_using_dac;
+	int i;
+	uint16_t eeprom_data;
+
+	if((i = pci_enable_device(pdev)))
+		return i;
+
+	if(!(i = pci_set_dma_mask(pdev, PCI_DMA_64BIT))) {
+		pci_using_dac = 1;
+	} else {
+		if((i = pci_set_dma_mask(pdev, PCI_DMA_32BIT))) {
+			E1000_ERR("No usable DMA configuration, aborting\n");
+			return i;
+		}
+		pci_using_dac = 0;
+	}
+
+	if((i = pci_request_regions(pdev, e1000_driver_name)))
+		return i;
+
+	pci_set_master(pdev);
+
+	netdev = alloc_etherdev(sizeof(struct e1000_adapter));
+	if(!netdev)
+		goto err_alloc_etherdev;
+
+	SET_MODULE_OWNER(netdev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev->priv;
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	adapter->hw.back = adapter;
+
+	mmio_start = pci_resource_start(pdev, BAR_0);
+	mmio_len = pci_resource_len(pdev, BAR_0);
+
+	adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
+	if(!adapter->hw.hw_addr)
+		goto err_ioremap;
+
+	for(i = BAR_1; i <= BAR_5; i++) {
+		if(pci_resource_len(pdev, i) == 0)
+			continue;
+		if(pci_resource_flags(pdev, i) & IORESOURCE_IO) {
+			adapter->hw.io_base = pci_resource_start(pdev, i);
+			break;
+		}
+	}
+
+	netdev->open = &e1000_open;
+	netdev->stop = &e1000_close;
+	netdev->hard_start_xmit = &e1000_xmit_frame;
+	netdev->get_stats = &e1000_get_stats;
+	netdev->set_multicast_list = &e1000_set_multi;
+	netdev->set_mac_address = &e1000_set_mac;
+	netdev->change_mtu = &e1000_change_mtu;
+	netdev->do_ioctl = &e1000_ioctl;
+	netdev->tx_timeout = &e1000_tx_timeout;
+	netdev->watchdog_timeo = HZ;
+	netdev->vlan_rx_register = e1000_vlan_rx_register;
+	netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid;
+	netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid;
+
+	netdev->irq = pdev->irq;
+	netdev->mem_start = mmio_start;
+	netdev->mem_end = mmio_start + mmio_len;
+	netdev->base_addr = adapter->hw.io_base;
+
+	adapter->bd_number = cards_found;
+	adapter->id_string = e1000_strings[ent->driver_data];
+
+	/* setup the private structure */
+
+	if(e1000_sw_init(adapter))
+		goto err_sw_init;
+
+	if(adapter->hw.mac_type >= e1000_82543) {
+		netdev->features = NETIF_F_SG |
+			           NETIF_F_HW_CSUM |
+		       	           NETIF_F_HW_VLAN_TX |
+		                   NETIF_F_HW_VLAN_RX |
+				   NETIF_F_HW_VLAN_FILTER;
+	} else {
+		netdev->features = NETIF_F_SG;
+	}
+
+	if(pci_using_dac)
+		netdev->features |= NETIF_F_HIGHDMA;
+
+	/* make sure the EEPROM is good */
+
+	if(e1000_validate_eeprom_checksum(&adapter->hw) < 0) {
+		printk(KERN_ERR "The EEPROM Checksum Is Not Valid\n");
+		goto err_eeprom;
+	}
+
+	/* copy the MAC address out of the EEPROM */
+
+	e1000_read_mac_addr(&adapter->hw);
+	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
+
+	if(!is_valid_ether_addr(netdev->dev_addr))
+		goto err_eeprom;
+
+	e1000_read_part_num(&adapter->hw, &(adapter->part_num));
+
+	e1000_get_bus_info(&adapter->hw);
+
+	if((adapter->hw.mac_type == e1000_82544) &&
+	   (adapter->hw.bus_type == e1000_bus_type_pcix))
+
+		adapter->max_data_per_txd = 4096;
+	else
+		adapter->max_data_per_txd = MAX_JUMBO_FRAME_SIZE;
+
+
+	init_timer(&adapter->watchdog_timer);
+	adapter->watchdog_timer.function = &e1000_watchdog;
+	adapter->watchdog_timer.data = (unsigned long) adapter;
+
+	init_timer(&adapter->phy_info_timer);
+	adapter->phy_info_timer.function = &e1000_update_phy_info;
+	adapter->phy_info_timer.data = (unsigned long) adapter;
+
+	INIT_TQUEUE(&adapter->tx_timeout_task,
+		(void (*)(void *))e1000_tx_timeout_task, netdev);
+
+	register_netdev(netdev);
+	memcpy(adapter->ifname, netdev->name, IFNAMSIZ);
+	adapter->ifname[IFNAMSIZ-1] = 0;
+
+	/* we're going to reset, so assume we have no link for now */
+
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+
+	printk(KERN_INFO "%s: %s\n", netdev->name, adapter->id_string);
+	e1000_check_options(adapter);
+	/* Initial Wake on LAN setting
+	 * If APM wake is enabled in the EEPROM,
+	 * enable the ACPI Magic Packet filter
+	 */
+
+	e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data);
+	if((adapter->hw.mac_type >= e1000_82544) &&
+	   (eeprom_data & E1000_EEPROM_APME))
+		adapter->wol |= E1000_WUFC_MAG;
+
+	/* reset the hardware with the new settings */
+
+	e1000_reset(adapter);
+	cards_found++;
+	return 0;
+
+err_sw_init:
+err_eeprom:
+	iounmap(adapter->hw.hw_addr);
+err_ioremap:
+	pci_release_regions(pdev);
+	kfree(netdev);
+err_alloc_etherdev:
+	return -ENOMEM;
+}
+
+/**
+ * e1000_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * e1000_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+
+static void __devexit
+e1000_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t manc;
+
+	if(adapter->hw.mac_type >= e1000_82540) {
+		manc = E1000_READ_REG(&adapter->hw, MANC);
+		if(manc & E1000_MANC_SMBUS_EN) {
+			manc |= E1000_MANC_ARP_EN;
+			E1000_WRITE_REG(&adapter->hw, MANC, manc);
+		}
+	}
+
+	unregister_netdev(netdev);
+
+	e1000_phy_hw_reset(&adapter->hw);
+
+	iounmap(adapter->hw.hw_addr);
+	pci_release_regions(pdev);
+
+	kfree(netdev);
+}
+
+/**
+ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * e1000_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+
+static int __devinit
+e1000_sw_init(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* PCI config space info */
+
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_id = pdev->subsystem_device;
+
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);
+
+	adapter->rx_buffer_len = E1000_RXBUFFER_2048;
+	hw->max_frame_size = netdev->mtu +
+	                         ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+	hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE;
+
+	/* identify the MAC */
+
+	if (e1000_set_mac_type(hw)) {
+		E1000_ERR("Unknown MAC Type\n");
+		return -1;
+	}
+
+	/* flow control settings */
+
+	hw->fc_high_water = E1000_FC_HIGH_THRESH;
+	hw->fc_low_water = E1000_FC_LOW_THRESH;
+	hw->fc_pause_time = E1000_FC_PAUSE_TIME;
+	hw->fc_send_xon = 1;
+
+	/* Media type - copper or fiber */
+
+	if(hw->mac_type >= e1000_82543) {
+		uint32_t status = E1000_READ_REG(hw, STATUS);
+
+		if(status & E1000_STATUS_TBIMODE)
+			hw->media_type = e1000_media_type_fiber;
+		else
+			hw->media_type = e1000_media_type_copper;
+	} else {
+		hw->media_type = e1000_media_type_fiber;
+	}
+
+	if(hw->mac_type < e1000_82543)
+		hw->report_tx_early = 0;
+	else
+		hw->report_tx_early = 1;
+
+	hw->wait_autoneg_complete = FALSE;
+	hw->tbi_compatibility_en = TRUE;
+	hw->adaptive_ifs = TRUE;
+
+	/* Copper options */
+
+	if(hw->media_type == e1000_media_type_copper) {
+		hw->mdix = AUTO_ALL_MODES;
+		hw->disable_polarity_correction = FALSE;
+	}
+
+	atomic_set(&adapter->irq_sem, 1);
+	spin_lock_init(&adapter->stats_lock);
+
+	return 0;
+}
+
+/**
+ * e1000_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+
+static int
+e1000_open(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+
+	/* allocate transmit descriptors */
+
+	if(e1000_setup_tx_resources(adapter))
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+
+	if(e1000_setup_rx_resources(adapter))
+		goto err_setup_rx;
+
+	if(e1000_up(adapter))
+		goto err_up;
+
+	return 0;
+
+err_up:
+	e1000_free_rx_resources(adapter);
+err_setup_rx:
+	e1000_free_tx_resources(adapter);
+err_setup_tx:
+	e1000_reset(adapter);
+
+	return -EBUSY;
+}
+
+/**
+ * e1000_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+
+static int
+e1000_close(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+
+	e1000_down(adapter);
+
+	e1000_free_tx_resources(adapter);
+	e1000_free_rx_resources(adapter);
+
+	return 0;
+}
+
+/**
+ * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+
+static int
+e1000_setup_tx_resources(struct e1000_adapter *adapter)
+{
+	struct e1000_desc_ring *txdr = &adapter->tx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	int size;
+
+	size = sizeof(struct e1000_buffer) * txdr->count;
+	txdr->buffer_info = kmalloc(size, GFP_KERNEL);
+	if(!txdr->buffer_info) {
+		return -ENOMEM;
+	}
+	memset(txdr->buffer_info, 0, size);
+
+	/* round up to nearest 4K */
+
+	txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
+	E1000_ROUNDUP(txdr->size, 4096);
+
+	txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma);
+	if(!txdr->desc) {
+		kfree(txdr->buffer_info);
+		return -ENOMEM;
+	}
+	memset(txdr->desc, 0, txdr->size);
+
+	txdr->next_to_use = 0;
+	txdr->next_to_clean = 0;
+
+	return 0;
+}
+
+/**
+ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+
+static void
+e1000_configure_tx(struct e1000_adapter *adapter)
+{
+	uint64_t tdba = adapter->tx_ring.dma;
+	uint32_t tdlen = adapter->tx_ring.count * sizeof(struct e1000_tx_desc);
+	uint32_t tctl, tipg;
+
+	E1000_WRITE_REG(&adapter->hw, TDBAL, (tdba & 0x00000000ffffffffULL));
+	E1000_WRITE_REG(&adapter->hw, TDBAH, (tdba >> 32));
+
+	E1000_WRITE_REG(&adapter->hw, TDLEN, tdlen);
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+
+	E1000_WRITE_REG(&adapter->hw, TDH, 0);
+	E1000_WRITE_REG(&adapter->hw, TDT, 0);
+
+	/* Set the default values for the Tx Inter Packet Gap timer */
+
+	switch (adapter->hw.mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+		tipg = DEFAULT_82542_TIPG_IPGT;
+		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
+		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
+		break;
+	default:
+		if(adapter->hw.media_type == e1000_media_type_fiber)
+			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
+		else
+			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
+		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
+		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
+	}
+	E1000_WRITE_REG(&adapter->hw, TIPG, tipg);
+
+	/* Set the Tx Interrupt Delay register */
+
+	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay);
+	if(adapter->hw.mac_type >= e1000_82540)
+		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay);
+
+	/* Program the Transmit Control Register */
+
+	tctl = E1000_READ_REG(&adapter->hw, TCTL);
+
+	tctl &= ~E1000_TCTL_CT;
+	tctl |= E1000_TCTL_EN | E1000_TCTL_PSP |
+	       (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+	E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
+
+	e1000_config_collision_dist(&adapter->hw);
+
+	/* Setup Transmit Descriptor Settings for this adapter */
+	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_IDE;
+
+	if(adapter->hw.report_tx_early == 1)
+		adapter->txd_cmd |= E1000_TXD_CMD_RS;
+	else
+		adapter->txd_cmd |= E1000_TXD_CMD_RPS;
+}
+
+/**
+ * e1000_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+
+static int
+e1000_setup_rx_resources(struct e1000_adapter *adapter)
+{
+	struct e1000_desc_ring *rxdr = &adapter->rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	int size;
+
+	size = sizeof(struct e1000_buffer) * rxdr->count;
+	rxdr->buffer_info = kmalloc(size, GFP_KERNEL);
+	if(!rxdr->buffer_info) {
+		return -ENOMEM;
+	}
+	memset(rxdr->buffer_info, 0, size);
+
+	/* Round up to nearest 4K */
+
+	rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc);
+	E1000_ROUNDUP(rxdr->size, 4096);
+
+	rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
+
+	if(!rxdr->desc) {
+		kfree(rxdr->buffer_info);
+		return -ENOMEM;
+	}
+	memset(rxdr->desc, 0, rxdr->size);
+
+	rxdr->next_to_clean = 0;
+	rxdr->next_to_use = 0;
+
+	return 0;
+}
+
+/**
+ * e1000_setup_rctl - configure the receive control register
+ * @adapter: Board private structure
+ **/
+
+static void
+e1000_setup_rctl(struct e1000_adapter *adapter)
+{
+	uint32_t rctl;
+
+	rctl = E1000_READ_REG(&adapter->hw, RCTL);
+
+	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+
+	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
+	        E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+	        (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+	if(adapter->hw.tbi_compatibility_on == 1)
+		rctl |= E1000_RCTL_SBP;
+	else
+		rctl &= ~E1000_RCTL_SBP;
+
+	rctl &= ~(E1000_RCTL_SZ_4096);
+	switch (adapter->rx_buffer_len) {
+	case E1000_RXBUFFER_2048:
+	default:
+		rctl |= E1000_RCTL_SZ_2048;
+		rctl &= ~(E1000_RCTL_BSEX | E1000_RCTL_LPE);
+		break;
+	case E1000_RXBUFFER_4096:
+		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+		break;
+	case E1000_RXBUFFER_8192:
+		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+		break;
+	case E1000_RXBUFFER_16384:
+		rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
+		break;
+	}
+
+	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+}
+
+/**
+ * e1000_configure_rx - Configure 8254x Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+
+static void
+e1000_configure_rx(struct e1000_adapter *adapter)
+{
+	uint64_t rdba = adapter->rx_ring.dma;
+	uint32_t rdlen = adapter->rx_ring.count * sizeof(struct e1000_rx_desc);
+	uint32_t rctl;
+	uint32_t rxcsum;
+
+	/* make sure receives are disabled while setting up the descriptors */
+
+	rctl = E1000_READ_REG(&adapter->hw, RCTL);
+	E1000_WRITE_REG(&adapter->hw, RCTL, rctl & ~E1000_RCTL_EN);
+
+	/* set the Receive Delay Timer Register */
+
+	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay);
+
+	if(adapter->hw.mac_type >= e1000_82540) {
+		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay);
+
+		/* Set the interrupt throttling rate.  Value is calculated
+		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
+#define MAX_INTS_PER_SEC        8000
+#define DEFAULT_ITR             1000000000/(MAX_INTS_PER_SEC * 256)
+		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
+	}
+
+	/* Setup the Base and Length of the Rx Descriptor Ring */
+
+	E1000_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL));
+	E1000_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32));
+
+	E1000_WRITE_REG(&adapter->hw, RDLEN, rdlen);
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers */
+	E1000_WRITE_REG(&adapter->hw, RDH, 0);
+	E1000_WRITE_REG(&adapter->hw, RDT, 0);
+
+	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
+	if((adapter->hw.mac_type >= e1000_82543) &&
+	   (adapter->rx_csum == TRUE)) {
+		rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
+		rxcsum |= E1000_RXCSUM_TUOFL;
+		E1000_WRITE_REG(&adapter->hw, RXCSUM, rxcsum);
+	}
+
+	/* Enable Receives */
+
+	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+}
+
+/**
+ * e1000_free_tx_resources - Free Tx Resources
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+
+static void
+e1000_free_tx_resources(struct e1000_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+
+	e1000_clean_tx_ring(adapter);
+
+	kfree(adapter->tx_ring.buffer_info);
+	adapter->tx_ring.buffer_info = NULL;
+
+	pci_free_consistent(pdev, adapter->tx_ring.size,
+	                    adapter->tx_ring.desc, adapter->tx_ring.dma);
+
+	adapter->tx_ring.desc = NULL;
+}
+
+/**
+ * e1000_clean_tx_ring - Free Tx Buffers
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_tx_ring(struct e1000_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned long size;
+	int i;
+
+	/* Free all the Tx ring sk_buffs */
+
+	for(i = 0; i < adapter->tx_ring.count; i++) {
+		if(adapter->tx_ring.buffer_info[i].skb) {
+
+			pci_unmap_page(pdev,
+			               adapter->tx_ring.buffer_info[i].dma,
+			               adapter->tx_ring.buffer_info[i].length,
+			               PCI_DMA_TODEVICE);
+
+			dev_kfree_skb(adapter->tx_ring.buffer_info[i].skb);
+
+			adapter->tx_ring.buffer_info[i].skb = NULL;
+		}
+	}
+
+	size = sizeof(struct e1000_buffer) * adapter->tx_ring.count;
+	memset(adapter->tx_ring.buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+
+	memset(adapter->tx_ring.desc, 0, adapter->tx_ring.size);
+
+	adapter->tx_ring.next_to_use = 0;
+	adapter->tx_ring.next_to_clean = 0;
+
+	E1000_WRITE_REG(&adapter->hw, TDH, 0);
+	E1000_WRITE_REG(&adapter->hw, TDT, 0);
+}
+
+/**
+ * e1000_free_rx_resources - Free Rx Resources
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+
+static void
+e1000_free_rx_resources(struct e1000_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+
+	e1000_clean_rx_ring(adapter);
+
+	kfree(adapter->rx_ring.buffer_info);
+	adapter->rx_ring.buffer_info = NULL;
+
+	pci_free_consistent(pdev, adapter->rx_ring.size,
+	                    adapter->rx_ring.desc, adapter->rx_ring.dma);
+
+	adapter->rx_ring.desc = NULL;
+}
+
+/**
+ * e1000_clean_rx_ring - Free Rx Buffers
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_rx_ring(struct e1000_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned long size;
+	int i;
+
+	/* Free all the Rx ring sk_buffs */
+
+	for(i = 0; i < adapter->rx_ring.count; i++) {
+		if(adapter->rx_ring.buffer_info[i].skb) {
+
+			pci_unmap_single(pdev,
+			                 adapter->rx_ring.buffer_info[i].dma,
+			                 adapter->rx_ring.buffer_info[i].length,
+			                 PCI_DMA_FROMDEVICE);
+
+			dev_kfree_skb(adapter->rx_ring.buffer_info[i].skb);
+
+			adapter->rx_ring.buffer_info[i].skb = NULL;
+		}
+	}
+
+	size = sizeof(struct e1000_buffer) * adapter->rx_ring.count;
+	memset(adapter->rx_ring.buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+
+	memset(adapter->rx_ring.desc, 0, adapter->rx_ring.size);
+
+	adapter->rx_ring.next_to_clean = 0;
+	adapter->rx_ring.next_to_use = 0;
+
+	E1000_WRITE_REG(&adapter->hw, RDH, 0);
+	E1000_WRITE_REG(&adapter->hw, RDT, 0);
+}
+
+/* The 82542 2.0 (revision 2) needs to have the receive unit in reset
+ * and memory write and invalidate disabled for certain operations
+ */
+static void
+e1000_enter_82542_rst(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	uint32_t rctl;
+
+	e1000_pci_clear_mwi(&adapter->hw);
+
+	rctl = E1000_READ_REG(&adapter->hw, RCTL);
+	rctl |= E1000_RCTL_RST;
+	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+	E1000_WRITE_FLUSH(&adapter->hw);
+	mdelay(5);
+
+	if(netif_running(netdev))
+		e1000_clean_rx_ring(adapter);
+}
+
+static void
+e1000_leave_82542_rst(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	uint32_t rctl;
+
+	rctl = E1000_READ_REG(&adapter->hw, RCTL);
+	rctl &= ~E1000_RCTL_RST;
+	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+	E1000_WRITE_FLUSH(&adapter->hw);
+	mdelay(5);
+
+	if(adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE)
+		e1000_pci_set_mwi(&adapter->hw);
+
+	if(netif_running(netdev)) {
+		e1000_configure_rx(adapter);
+		e1000_alloc_rx_buffers(adapter);
+	}
+}
+
+/**
+ * e1000_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+
+static int
+e1000_set_mac(struct net_device *netdev, void *p)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	struct sockaddr *addr = p;
+
+	if(!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	/* 82542 2.0 needs to be in reset to write receive address registers */
+
+	if(adapter->hw.mac_type == e1000_82542_rev2_0)
+		e1000_enter_82542_rst(adapter);
+
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	memcpy(adapter->hw.mac_addr, addr->sa_data, netdev->addr_len);
+
+	e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0);
+
+	if(adapter->hw.mac_type == e1000_82542_rev2_0)
+		e1000_leave_82542_rst(adapter);
+
+	return 0;
+}
+
+/**
+ * e1000_set_multi - Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_multi entry point is called whenever the multicast address
+ * list or the network interface flags are updated.  This routine is
+ * resposible for configuring the hardware for proper multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+
+static void
+e1000_set_multi(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	struct e1000_hw *hw = &adapter->hw;
+	struct dev_mc_list *mc_ptr;
+	uint32_t rctl;
+	uint32_t hash_value;
+	int i;
+
+	/* Check for Promiscuous and All Multicast modes */
+
+	rctl = E1000_READ_REG(hw, RCTL);
+
+	if(netdev->flags & IFF_PROMISC) {
+		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+	} else if(netdev->flags & IFF_ALLMULTI) {
+		rctl |= E1000_RCTL_MPE;
+		rctl &= ~E1000_RCTL_UPE;
+	} else {
+		rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
+	}
+
+	E1000_WRITE_REG(hw, RCTL, rctl);
+
+	/* 82542 2.0 needs to be in reset to write receive address registers */
+
+	if(hw->mac_type == e1000_82542_rev2_0)
+		e1000_enter_82542_rst(adapter);
+
+	/* load the first 15 multicast address into the exact filters 1-15
+	 * RAR 0 is used for the station MAC adddress
+	 * if there are not 15 addresses, go ahead and clear the filters
+	 */
+	mc_ptr = netdev->mc_list;
+
+	for(i = 1; i < E1000_RAR_ENTRIES; i++) {
+		if(mc_ptr) {
+			e1000_rar_set(hw, mc_ptr->dmi_addr, i);
+			mc_ptr = mc_ptr->next;
+		} else {
+			E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0);
+			E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0);
+		}
+	}
+
+	/* clear the old settings from the multicast hash table */
+
+	for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++)
+		E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+
+	/* load any remaining addresses into the hash table */
+
+	for(; mc_ptr; mc_ptr = mc_ptr->next) {
+		hash_value = e1000_hash_mc_addr(hw, mc_ptr->dmi_addr);
+		e1000_mta_set(hw, hash_value);
+	}
+
+	if(hw->mac_type == e1000_82542_rev2_0)
+		e1000_leave_82542_rst(adapter);
+}
+
+
+/* need to wait a few seconds after link up to get diagnostic information from the phy */
+
+static void
+e1000_update_phy_info(unsigned long data)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+	e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
+}
+
+/**
+ * e1000_watchdog - Timer Call-back
+ * @data: pointer to netdev cast into an unsigned long
+ **/
+
+static void
+e1000_watchdog(unsigned long data)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_desc_ring *txdr = &adapter->tx_ring;
+	int i;
+
+	e1000_check_for_link(&adapter->hw);
+
+	if(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
+		if(!netif_carrier_ok(netdev)) {
+			e1000_get_speed_and_duplex(&adapter->hw,
+			                           &adapter->link_speed,
+			                           &adapter->link_duplex);
+
+			printk(KERN_INFO
+			       "e1000: %s NIC Link is Up %d Mbps %s\n",
+			       netdev->name, adapter->link_speed,
+			       adapter->link_duplex == FULL_DUPLEX ?
+			       "Full Duplex" : "Half Duplex");
+
+			netif_carrier_on(netdev);
+			netif_wake_queue(netdev);
+			mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ);
+		}
+	} else {
+		if(netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+			printk(KERN_INFO
+			       "e1000: %s NIC Link is Down\n",
+			       netdev->name);
+			netif_carrier_off(netdev);
+			netif_stop_queue(netdev);
+			mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ);
+		}
+	}
+
+	e1000_update_stats(adapter);
+	e1000_update_adaptive(&adapter->hw);
+
+
+	/* Cause software interrupt to ensure rx ring is cleaned */
+	E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0);
+
+	/* Early detection of hung controller */
+	i = txdr->next_to_clean;
+	if(txdr->buffer_info[i].dma &&
+	   time_after(jiffies, txdr->buffer_info[i].time_stamp + HZ) &&
+	   !(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF))
+		netif_stop_queue(netdev);
+
+	/* Reset the timer */
+	mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ);
+}
+
+#define E1000_TX_FLAGS_CSUM		0x00000001
+#define E1000_TX_FLAGS_VLAN		0x00000002
+#define E1000_TX_FLAGS_VLAN_MASK	0xffff0000
+#define E1000_TX_FLAGS_VLAN_SHIFT	16
+
+static inline boolean_t
+e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
+{
+	struct e1000_context_desc *context_desc;
+	int i;
+	uint8_t css, cso;
+
+	if(skb->ip_summed == CHECKSUM_HW) {
+		css = skb->h.raw - skb->data;
+		cso = (skb->h.raw + skb->csum) - skb->data;
+
+		i = adapter->tx_ring.next_to_use;
+		context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
+
+		context_desc->upper_setup.tcp_fields.tucss = css;
+		context_desc->upper_setup.tcp_fields.tucso = cso;
+		context_desc->upper_setup.tcp_fields.tucse = 0;
+		context_desc->tcp_seg_setup.data = 0;
+		context_desc->cmd_and_length =
+			cpu_to_le32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
+
+		i = (i + 1) % adapter->tx_ring.count;
+		adapter->tx_ring.next_to_use = i;
+
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+static inline int
+e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb)
+{
+	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+	int len, offset, size, count, i;
+
+	int f;
+	len = skb->len - skb->data_len;
+	i = (tx_ring->next_to_use + tx_ring->count - 1) % tx_ring->count;
+	count = 0;
+
+	offset = 0;
+
+	while(len) {
+		i = (i + 1) % tx_ring->count;
+		size = min(len, adapter->max_data_per_txd);
+		tx_ring->buffer_info[i].length = size;
+		tx_ring->buffer_info[i].dma =
+			pci_map_single(adapter->pdev,
+				skb->data + offset,
+				size,
+				PCI_DMA_TODEVICE);
+		tx_ring->buffer_info[i].time_stamp = jiffies;
+
+		len -= size;
+		offset += size;
+		count++;
+	}
+
+	for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+		struct skb_frag_struct *frag;
+
+		frag = &skb_shinfo(skb)->frags[f];
+		len = frag->size;
+		offset = 0;
+
+		while(len) {
+			i = (i + 1) % tx_ring->count;
+			size = min(len, adapter->max_data_per_txd);
+			tx_ring->buffer_info[i].length = size;
+			tx_ring->buffer_info[i].dma =
+				pci_map_page(adapter->pdev,
+					frag->page,
+					frag->page_offset + offset,
+					size,
+					PCI_DMA_TODEVICE);
+
+			len -= size;
+			offset += size;
+			count++;
+		}
+	}
+	tx_ring->buffer_info[i].skb = skb;
+
+	return count;
+}
+
+static inline void
+e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags)
+{
+	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+	struct e1000_tx_desc *tx_desc = NULL;
+	uint32_t txd_upper, txd_lower;
+	int i;
+
+	txd_upper = 0;
+	txd_lower = adapter->txd_cmd;
+
+	if(tx_flags & E1000_TX_FLAGS_CSUM) {
+		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+	}
+
+	if(tx_flags & E1000_TX_FLAGS_VLAN) {
+		txd_lower |= E1000_TXD_CMD_VLE;
+		txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
+	}
+
+	i = tx_ring->next_to_use;
+
+	while(count--) {
+		tx_desc = E1000_TX_DESC(*tx_ring, i);
+		tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma);
+		tx_desc->lower.data =
+			cpu_to_le32(txd_lower | tx_ring->buffer_info[i].length);
+		tx_desc->upper.data = cpu_to_le32(txd_upper);
+		i = (i + 1) % tx_ring->count;
+	}
+
+	tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP);
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64). */
+	wmb();
+
+	tx_ring->next_to_use = i;
+	E1000_WRITE_REG(&adapter->hw, TDT, i);
+}
+
+#define TXD_USE_COUNT(S, X) (((S) / (X)) + (((S) % (X)) ? 1 : 0))
+
+static int
+e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	int tx_flags = 0, count;
+	int f;
+
+	count = TXD_USE_COUNT(skb->len - skb->data_len,
+	                      adapter->max_data_per_txd);
+
+	if(count == 0) {
+		dev_kfree_skb_any(skb);
+		return 0;
+	}
+
+	for(f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
+		                       adapter->max_data_per_txd);
+
+	if(skb->ip_summed == CHECKSUM_HW)
+		count++;
+
+	if(E1000_DESC_UNUSED(&adapter->tx_ring) < count) {
+		netif_stop_queue(netdev);
+		return 1;
+	}
+
+	if(e1000_tx_csum(adapter, skb))
+		tx_flags |= E1000_TX_FLAGS_CSUM;
+
+	if(adapter->vlgrp && vlan_tx_tag_present(skb)) {
+		tx_flags |= E1000_TX_FLAGS_VLAN;
+		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	count = e1000_tx_map(adapter, skb);
+
+	e1000_tx_queue(adapter, count, tx_flags);
+
+	netdev->trans_start = jiffies;
+
+	return 0;
+}
+
+/**
+ * e1000_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ **/
+
+static void
+e1000_tx_timeout(struct net_device *netdev)
+{
+    //struct e1000_adapter *adapter = netdev->priv;
+
+	/* Do the reset outside of interrupt context */
+	//schedule_task(&adapter->tx_timeout_task); XXXX Not in Xen!!!
+	e1000_tx_timeout_task(netdev);  // XXX HACK
+}
+
+static void
+e1000_tx_timeout_task(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+
+	netif_device_detach(netdev);
+	e1000_down(adapter);
+	e1000_up(adapter);
+	netif_device_attach(netdev);
+}
+
+/**
+ * e1000_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the timer callback.
+ **/
+
+static struct net_device_stats *
+e1000_get_stats(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+
+	return &adapter->net_stats;
+}
+
+/**
+ * e1000_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+
+static int
+e1000_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	int old_mtu = adapter->rx_buffer_len;
+	int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+
+	if((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) ||
+	   (max_frame > MAX_JUMBO_FRAME_SIZE)) {
+		E1000_ERR("Invalid MTU setting\n");
+		return -EINVAL;
+	}
+
+	if(max_frame <= MAXIMUM_ETHERNET_FRAME_SIZE) {
+		adapter->rx_buffer_len = E1000_RXBUFFER_2048;
+
+	} else if(adapter->hw.mac_type < e1000_82543) {
+		E1000_ERR("Jumbo Frames not supported on 82542\n");
+		return -EINVAL;
+
+	} else if(max_frame <= E1000_RXBUFFER_4096) {
+		adapter->rx_buffer_len = E1000_RXBUFFER_4096;
+
+	} else if(max_frame <= E1000_RXBUFFER_8192) {
+		adapter->rx_buffer_len = E1000_RXBUFFER_8192;
+
+	} else {
+		adapter->rx_buffer_len = E1000_RXBUFFER_16384;
+	}
+
+	if(old_mtu != adapter->rx_buffer_len && netif_running(netdev)) {
+
+		e1000_down(adapter);
+		e1000_up(adapter);
+	}
+
+	netdev->mtu = new_mtu;
+	adapter->hw.max_frame_size = max_frame;
+
+	return 0;
+}
+
+/**
+ * e1000_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_update_stats(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+	uint16_t phy_tmp;
+
+#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
+
+	spin_lock_irqsave(&adapter->stats_lock, flags);
+
+	/* these counters are modified from e1000_adjust_tbi_stats,
+	 * called from the interrupt context, so they must only
+	 * be written while holding adapter->stats_lock
+	 */
+
+	adapter->stats.crcerrs += E1000_READ_REG(hw, CRCERRS);
+	adapter->stats.gprc += E1000_READ_REG(hw, GPRC);
+	adapter->stats.gorcl += E1000_READ_REG(hw, GORCL);
+	adapter->stats.gorch += E1000_READ_REG(hw, GORCH);
+	adapter->stats.bprc += E1000_READ_REG(hw, BPRC);
+	adapter->stats.mprc += E1000_READ_REG(hw, MPRC);
+	adapter->stats.roc += E1000_READ_REG(hw, ROC);
+	adapter->stats.prc64 += E1000_READ_REG(hw, PRC64);
+	adapter->stats.prc127 += E1000_READ_REG(hw, PRC127);
+	adapter->stats.prc255 += E1000_READ_REG(hw, PRC255);
+	adapter->stats.prc511 += E1000_READ_REG(hw, PRC511);
+	adapter->stats.prc1023 += E1000_READ_REG(hw, PRC1023);
+	adapter->stats.prc1522 += E1000_READ_REG(hw, PRC1522);
+
+	spin_unlock_irqrestore(&adapter->stats_lock, flags);
+
+	/* the rest of the counters are only modified here */
+
+	adapter->stats.symerrs += E1000_READ_REG(hw, SYMERRS);
+	adapter->stats.mpc += E1000_READ_REG(hw, MPC);
+	adapter->stats.scc += E1000_READ_REG(hw, SCC);
+	adapter->stats.ecol += E1000_READ_REG(hw, ECOL);
+	adapter->stats.mcc += E1000_READ_REG(hw, MCC);
+	adapter->stats.latecol += E1000_READ_REG(hw, LATECOL);
+	adapter->stats.dc += E1000_READ_REG(hw, DC);
+	adapter->stats.sec += E1000_READ_REG(hw, SEC);
+	adapter->stats.rlec += E1000_READ_REG(hw, RLEC);
+	adapter->stats.xonrxc += E1000_READ_REG(hw, XONRXC);
+	adapter->stats.xontxc += E1000_READ_REG(hw, XONTXC);
+	adapter->stats.xoffrxc += E1000_READ_REG(hw, XOFFRXC);
+	adapter->stats.xofftxc += E1000_READ_REG(hw, XOFFTXC);
+	adapter->stats.fcruc += E1000_READ_REG(hw, FCRUC);
+	adapter->stats.gptc += E1000_READ_REG(hw, GPTC);
+	adapter->stats.gotcl += E1000_READ_REG(hw, GOTCL);
+	adapter->stats.gotch += E1000_READ_REG(hw, GOTCH);
+	adapter->stats.rnbc += E1000_READ_REG(hw, RNBC);
+	adapter->stats.ruc += E1000_READ_REG(hw, RUC);
+	adapter->stats.rfc += E1000_READ_REG(hw, RFC);
+	adapter->stats.rjc += E1000_READ_REG(hw, RJC);
+	adapter->stats.torl += E1000_READ_REG(hw, TORL);
+	adapter->stats.torh += E1000_READ_REG(hw, TORH);
+	adapter->stats.totl += E1000_READ_REG(hw, TOTL);
+	adapter->stats.toth += E1000_READ_REG(hw, TOTH);
+	adapter->stats.tpr += E1000_READ_REG(hw, TPR);
+	adapter->stats.ptc64 += E1000_READ_REG(hw, PTC64);
+	adapter->stats.ptc127 += E1000_READ_REG(hw, PTC127);
+	adapter->stats.ptc255 += E1000_READ_REG(hw, PTC255);
+	adapter->stats.ptc511 += E1000_READ_REG(hw, PTC511);
+	adapter->stats.ptc1023 += E1000_READ_REG(hw, PTC1023);
+	adapter->stats.ptc1522 += E1000_READ_REG(hw, PTC1522);
+	adapter->stats.mptc += E1000_READ_REG(hw, MPTC);
+	adapter->stats.bptc += E1000_READ_REG(hw, BPTC);
+
+	/* used for adaptive IFS */
+
+	hw->tx_packet_delta = E1000_READ_REG(hw, TPT);
+	adapter->stats.tpt += hw->tx_packet_delta;
+	hw->collision_delta = E1000_READ_REG(hw, COLC);
+	adapter->stats.colc += hw->collision_delta;
+
+	if(hw->mac_type >= e1000_82543) {
+		adapter->stats.algnerrc += E1000_READ_REG(hw, ALGNERRC);
+		adapter->stats.rxerrc += E1000_READ_REG(hw, RXERRC);
+		adapter->stats.tncrs += E1000_READ_REG(hw, TNCRS);
+		adapter->stats.cexterr += E1000_READ_REG(hw, CEXTERR);
+		adapter->stats.tsctc += E1000_READ_REG(hw, TSCTC);
+		adapter->stats.tsctfc += E1000_READ_REG(hw, TSCTFC);
+	}
+
+	/* Fill out the OS statistics structure */
+
+	adapter->net_stats.rx_packets = adapter->stats.gprc;
+	adapter->net_stats.tx_packets = adapter->stats.gptc;
+	adapter->net_stats.rx_bytes = adapter->stats.gorcl;
+	adapter->net_stats.tx_bytes = adapter->stats.gotcl;
+	adapter->net_stats.multicast = adapter->stats.mprc;
+	adapter->net_stats.collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	adapter->net_stats.rx_errors = adapter->stats.rxerrc +
+		adapter->stats.crcerrs + adapter->stats.algnerrc +
+		adapter->stats.rlec + adapter->stats.rnbc +
+		adapter->stats.mpc + adapter->stats.cexterr;
+	adapter->net_stats.rx_dropped = adapter->stats.rnbc;
+	adapter->net_stats.rx_length_errors = adapter->stats.rlec;
+	adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
+	adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
+	adapter->net_stats.rx_fifo_errors = adapter->stats.mpc;
+	adapter->net_stats.rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+
+	adapter->net_stats.tx_errors = adapter->stats.ecol +
+	                               adapter->stats.latecol;
+	adapter->net_stats.tx_aborted_errors = adapter->stats.ecol;
+	adapter->net_stats.tx_window_errors = adapter->stats.latecol;
+	adapter->net_stats.tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	/* Phy Stats */
+
+	if(hw->media_type == e1000_media_type_copper) {
+		if((adapter->link_speed == SPEED_1000) &&
+		   (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
+			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
+			adapter->phy_stats.idle_errors += phy_tmp;
+		}
+
+		if((hw->mac_type <= e1000_82546) &&
+		   !e1000_read_phy_reg(hw, M88E1000_RX_ERR_CNTR, &phy_tmp))
+			adapter->phy_stats.receive_errors += phy_tmp;
+	}
+}
+
+/**
+ * e1000_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+
+static inline void
+e1000_irq_disable(struct e1000_adapter *adapter)
+{
+	atomic_inc(&adapter->irq_sem);
+	E1000_WRITE_REG(&adapter->hw, IMC, ~0);
+	E1000_WRITE_FLUSH(&adapter->hw);
+	synchronize_irq();
+}
+
+/**
+ * e1000_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+
+static inline void
+e1000_irq_enable(struct e1000_adapter *adapter)
+{
+	if(atomic_dec_and_test(&adapter->irq_sem)) {
+		E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK);
+		E1000_WRITE_FLUSH(&adapter->hw);
+	}
+}
+
+/**
+ * e1000_intr - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ * @pt_regs: CPU registers structure
+ **/
+
+static void
+e1000_intr(int irq, void *data, struct pt_regs *regs)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t icr;
+	int i = E1000_MAX_INTR;
+
+	while(i && (icr = E1000_READ_REG(&adapter->hw, ICR))) {
+
+		if(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+			adapter->hw.get_link_status = 1;
+			mod_timer(&adapter->watchdog_timer, jiffies);
+		}
+
+		e1000_clean_rx_irq(adapter);
+		e1000_clean_tx_irq(adapter);
+		i--;
+
+	}
+}
+
+/**
+ * e1000_clean_tx_irq - Reclaim resources after transmit completes
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_tx_irq(struct e1000_adapter *adapter)
+{
+	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_tx_desc *tx_desc;
+	int i;
+
+	i = tx_ring->next_to_clean;
+	tx_desc = E1000_TX_DESC(*tx_ring, i);
+
+	while(tx_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
+
+		if(tx_ring->buffer_info[i].dma) {
+
+			pci_unmap_page(pdev,
+			               tx_ring->buffer_info[i].dma,
+			               tx_ring->buffer_info[i].length,
+			               PCI_DMA_TODEVICE);
+
+			tx_ring->buffer_info[i].dma = 0;
+		}
+
+		if(tx_ring->buffer_info[i].skb) {
+
+			dev_kfree_skb_any(tx_ring->buffer_info[i].skb);
+
+			tx_ring->buffer_info[i].skb = NULL;
+		}
+
+		tx_desc->upper.data = 0;
+
+		i = (i + 1) % tx_ring->count;
+		tx_desc = E1000_TX_DESC(*tx_ring, i);
+	}
+
+	tx_ring->next_to_clean = i;
+
+	if(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) &&
+	   (E1000_DESC_UNUSED(tx_ring) > E1000_TX_QUEUE_WAKE)) {
+
+		netif_wake_queue(netdev);
+	}
+}
+
+/**
+ * e1000_clean_rx_irq - Send received data up the network stack,
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_rx_irq(struct e1000_adapter *adapter)
+{
+	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_rx_desc *rx_desc;
+	struct sk_buff *skb;
+	unsigned long flags;
+	uint32_t length;
+	uint8_t last_byte;
+	int i;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC(*rx_ring, i);
+
+	while(rx_desc->status & E1000_RXD_STAT_DD) {
+
+		pci_unmap_single(pdev,
+		                 rx_ring->buffer_info[i].dma,
+		                 rx_ring->buffer_info[i].length,
+		                 PCI_DMA_FROMDEVICE);
+
+		skb = rx_ring->buffer_info[i].skb;
+		length = le16_to_cpu(rx_desc->length);
+
+		if(!(rx_desc->status & E1000_RXD_STAT_EOP)) {
+
+			/* All receives must fit into a single buffer */
+
+			E1000_DBG("Receive packet consumed multiple buffers\n");
+
+			dev_kfree_skb_irq(skb);
+			rx_desc->status = 0;
+			rx_ring->buffer_info[i].skb = NULL;
+
+			i = (i + 1) % rx_ring->count;
+
+			rx_desc = E1000_RX_DESC(*rx_ring, i);
+			continue;
+		}
+
+		if(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
+
+			last_byte = *(skb->data + length - 1);
+
+			if(TBI_ACCEPT(&adapter->hw, rx_desc->status,
+			              rx_desc->errors, length, last_byte)) {
+
+				spin_lock_irqsave(&adapter->stats_lock, flags);
+
+				e1000_tbi_adjust_stats(&adapter->hw,
+				                       &adapter->stats,
+				                       length, skb->data);
+
+				spin_unlock_irqrestore(&adapter->stats_lock,
+				                       flags);
+				length--;
+			} else {
+
+				dev_kfree_skb_irq(skb);
+				rx_desc->status = 0;
+				rx_ring->buffer_info[i].skb = NULL;
+
+				i = (i + 1) % rx_ring->count;
+
+				rx_desc = E1000_RX_DESC(*rx_ring, i);
+				continue;
+			}
+		}
+
+		/* Good Receive */
+		skb_put(skb, length - ETHERNET_FCS_SIZE);
+
+		/* Receive Checksum Offload */
+		e1000_rx_checksum(adapter, rx_desc, skb);
+
+		skb->protocol = eth_type_trans(skb, netdev);
+		if(adapter->vlgrp && (rx_desc->status & E1000_RXD_STAT_VP)) {
+			vlan_hwaccel_rx(skb, adapter->vlgrp,
+				(rx_desc->special & E1000_RXD_SPC_VLAN_MASK));
+		} else {
+			netif_rx(skb);
+		}
+		netdev->last_rx = jiffies;
+
+		rx_desc->status = 0;
+		rx_ring->buffer_info[i].skb = NULL;
+
+		i = (i + 1) % rx_ring->count;
+
+		rx_desc = E1000_RX_DESC(*rx_ring, i);
+	}
+
+	rx_ring->next_to_clean = i;
+
+	e1000_alloc_rx_buffers(adapter);
+}
+
+/**
+ * e1000_alloc_rx_buffers - Replace used receive buffers
+ * @data: address of board private structure
+ **/
+
+static void
+e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
+{
+	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_rx_desc *rx_desc;
+	struct sk_buff *skb;
+	int reserve_len;
+	int i;
+
+	reserve_len = 2;
+
+	i = rx_ring->next_to_use;
+
+	while(!rx_ring->buffer_info[i].skb) {
+		rx_desc = E1000_RX_DESC(*rx_ring, i);
+
+		skb = dev_alloc_skb(adapter->rx_buffer_len + reserve_len);
+
+		if(!skb) {
+			/* Better luck next round */
+			break;
+		}
+
+		/* Make buffer alignment 2 beyond a 16 byte boundary
+		 * this will result in a 16 byte aligned IP header after
+		 * the 14 byte MAC header is removed
+		 */
+		skb_reserve(skb, reserve_len);
+
+		skb->dev = netdev;
+
+		rx_ring->buffer_info[i].skb = skb;
+		rx_ring->buffer_info[i].length = adapter->rx_buffer_len;
+		rx_ring->buffer_info[i].dma =
+			pci_map_single(pdev,
+			               skb->data,
+			               adapter->rx_buffer_len,
+			               PCI_DMA_FROMDEVICE);
+
+		rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
+
+		if(!(i % E1000_RX_BUFFER_WRITE)) {
+			/* Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64). */
+			wmb();
+
+			E1000_WRITE_REG(&adapter->hw, RDT, i);
+		}
+
+		i = (i + 1) % rx_ring->count;
+	}
+
+	rx_ring->next_to_use = i;
+}
+
+/**
+ * e1000_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+
+static int
+e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCETHTOOL:
+		return e1000_ethtool_ioctl(netdev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * e1000_rx_checksum - Receive Checksum Offload for 82543
+ * @adapter: board private structure
+ * @rx_desc: receive descriptor
+ * @sk_buff: socket buffer with received data
+ **/
+
+static inline void
+e1000_rx_checksum(struct e1000_adapter *adapter,
+                  struct e1000_rx_desc *rx_desc,
+                  struct sk_buff *skb)
+{
+	/* 82543 or newer only */
+	if((adapter->hw.mac_type < e1000_82543) ||
+	/* Ignore Checksum bit is set */
+	(rx_desc->status & E1000_RXD_STAT_IXSM) ||
+	/* TCP Checksum has not been calculated */
+	(!(rx_desc->status & E1000_RXD_STAT_TCPCS))) {
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
+	}
+
+	/* At this point we know the hardware did the TCP checksum */
+	/* now look at the TCP checksum error bit */
+	if(rx_desc->errors & E1000_RXD_ERR_TCPE) {
+		/* let the stack verify checksum errors */
+		skb->ip_summed = CHECKSUM_NONE;
+		adapter->hw_csum_err++;
+	} else {
+	/* TCP checksum is good */
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		adapter->hw_csum_good++;
+	}
+}
+
+void
+e1000_pci_set_mwi(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->back;
+
+	pci_set_mwi(adapter->pdev);
+}
+
+void
+e1000_pci_clear_mwi(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->back;
+
+	pci_clear_mwi(adapter->pdev);
+}
+
+void
+e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
+{
+	struct e1000_adapter *adapter = hw->back;
+
+	pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void
+e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
+{
+	struct e1000_adapter *adapter = hw->back;
+
+	pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+uint32_t
+e1000_io_read(struct e1000_hw *hw, uint32_t port)
+{
+	return inl(port);
+}
+
+void
+e1000_io_write(struct e1000_hw *hw, uint32_t port, uint32_t value)
+{
+	outl(value, port);
+}
+
+static void
+e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t ctrl, rctl;
+
+	e1000_irq_disable(adapter);
+	adapter->vlgrp = grp;
+
+	if(grp) {
+		/* enable VLAN tag insert/strip */
+
+		E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE);
+
+		ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+		ctrl |= E1000_CTRL_VME;
+		E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+
+		/* enable VLAN receive filtering */
+
+		rctl = E1000_READ_REG(&adapter->hw, RCTL);
+		rctl |= E1000_RCTL_VFE;
+		rctl &= ~E1000_RCTL_CFIEN;
+		E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+	} else {
+		/* disable VLAN tag insert/strip */
+
+		ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+		ctrl &= ~E1000_CTRL_VME;
+		E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+
+		/* disable VLAN filtering */
+
+		rctl = E1000_READ_REG(&adapter->hw, RCTL);
+		rctl &= ~E1000_RCTL_VFE;
+		E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+	}
+
+	e1000_irq_enable(adapter);
+}
+
+static void
+e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t vfta, index;
+
+	/* add VID to filter table */
+
+	index = (vid >> 5) & 0x7F;
+	vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index);
+	vfta |= (1 << (vid & 0x1F));
+	e1000_write_vfta(&adapter->hw, index, vfta);
+}
+
+static void
+e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid)
+{
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t vfta, index;
+
+	e1000_irq_disable(adapter);
+
+	if(adapter->vlgrp)
+		adapter->vlgrp->vlan_devices[vid] = NULL;
+
+	e1000_irq_enable(adapter);
+
+	/* remove VID from filter table*/
+
+	index = (vid >> 5) & 0x7F;
+	vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index);
+	vfta &= ~(1 << (vid & 0x1F));
+	e1000_write_vfta(&adapter->hw, index, vfta);
+}
+
+static void
+e1000_restore_vlan(struct e1000_adapter *adapter)
+{
+	e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp);
+
+	if(adapter->vlgrp) {
+		uint16_t vid;
+		for(vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+			if(!adapter->vlgrp->vlan_devices[vid])
+				continue;
+			e1000_vlan_rx_add_vid(adapter->netdev, vid);
+		}
+	}
+}
+
+static int
+e1000_notify_reboot(struct notifier_block *nb, unsigned long event, void *p)
+{
+	struct pci_dev *pdev = NULL;
+
+	switch(event) {
+	case SYS_DOWN:
+	case SYS_HALT:
+	case SYS_POWER_OFF:
+		pci_for_each_dev(pdev) {
+			if(pci_dev_driver(pdev) == &e1000_driver)
+				e1000_suspend(pdev, 3);
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static int
+e1000_suspend(struct pci_dev *pdev, uint32_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t ctrl, ctrl_ext, rctl, manc, status;
+	uint32_t wufc = adapter->wol;
+
+	netif_device_detach(netdev);
+
+	if(netif_running(netdev))
+		e1000_down(adapter);
+
+	status = E1000_READ_REG(&adapter->hw, STATUS);
+	if(status & E1000_STATUS_LU)
+		wufc &= ~E1000_WUFC_LNKC;
+
+	if(wufc) {
+		e1000_setup_rctl(adapter);
+		e1000_set_multi(netdev);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if(adapter->wol & E1000_WUFC_MC) {
+			rctl = E1000_READ_REG(&adapter->hw, RCTL);
+			rctl |= E1000_RCTL_MPE;
+			E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+		}
+
+		if(adapter->hw.mac_type >= e1000_82540) {
+			ctrl = E1000_READ_REG(&adapter->hw, CTRL);
+			/* advertise wake from D3Cold */
+			#define E1000_CTRL_ADVD3WUC 0x00100000
+			/* phy power management enable */
+			#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
+			ctrl |= E1000_CTRL_ADVD3WUC |
+				E1000_CTRL_EN_PHY_PWR_MGMT;
+			E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
+		}
+
+		if(adapter->hw.media_type == e1000_media_type_fiber) {
+			/* keep the laser running in D3 */
+			ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+			ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA;
+			E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ctrl_ext);
+		}
+
+		E1000_WRITE_REG(&adapter->hw, WUC, E1000_WUC_PME_EN);
+		E1000_WRITE_REG(&adapter->hw, WUFC, wufc);
+		pci_enable_wake(pdev, 3, 1);
+		pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */
+	} else {
+		E1000_WRITE_REG(&adapter->hw, WUC, 0);
+		E1000_WRITE_REG(&adapter->hw, WUFC, 0);
+		pci_enable_wake(pdev, 3, 0);
+		pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+	}
+
+	pci_save_state(pdev, adapter->pci_state);
+
+	if(adapter->hw.mac_type >= e1000_82540) {
+		manc = E1000_READ_REG(&adapter->hw, MANC);
+		if(manc & E1000_MANC_SMBUS_EN) {
+			manc |= E1000_MANC_ARP_EN;
+			E1000_WRITE_REG(&adapter->hw, MANC, manc);
+			state = 0;
+		}
+	}
+
+	state = (state > 0) ? 3 : 0;
+	pci_set_power_state(pdev, state);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int
+e1000_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev->priv;
+	uint32_t manc;
+
+	pci_set_power_state(pdev, 0);
+	pci_restore_state(pdev, adapter->pci_state);
+
+	pci_enable_wake(pdev, 3, 0);
+	pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */
+
+	e1000_reset(adapter);
+	E1000_WRITE_REG(&adapter->hw, WUS, ~0);
+
+	if(netif_running(netdev))
+		e1000_up(adapter);
+
+	netif_device_attach(netdev);
+
+	if(adapter->hw.mac_type >= e1000_82540) {
+		manc = E1000_READ_REG(&adapter->hw, MANC);
+		manc &= ~(E1000_MANC_ARP_EN);
+		E1000_WRITE_REG(&adapter->hw, MANC, manc);
+	}
+
+	return 0;
+}
+#endif
+
+/* e1000_main.c */
diff --git a/xen/drivers/net/e1000/e1000_osdep.h b/xen/drivers/net/e1000/e1000_osdep.h
new file mode 100644
index 0000000000..40b62bfecd
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_osdep.h
@@ -0,0 +1,112 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* glue for the OS independant part of e1000
+ * includes register access macros
+ */
+
+#ifndef _E1000_OSDEP_H_
+#define _E1000_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+
+#ifndef msec_delay
+#define msec_delay(x) {\
+ 	int s=jiffies+1+((x*HZ)/1000); \
+	while(jiffies<s); }
+
+#if 0
+/********************  NOT in XEN ! *******/
+#define XXXXmsec_delay(x)	do { if(in_interrupt()) { \
+				/* Don't mdelay in interrupt context! */ \
+	                	BUG(); \
+			} else { \
+				set_current_state(TASK_UNINTERRUPTIBLE); \
+				schedule_timeout((x * HZ)/1000); \
+			} } while(0)
+#endif
+
+#else
+#error "msec already defined!"
+#endif
+
+#define PCI_COMMAND_REGISTER   PCI_COMMAND
+#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
+
+typedef enum {
+    FALSE = 0,
+    TRUE = 1
+} boolean_t;
+
+#define MSGOUT(S, A, B)	printk(KERN_DEBUG S "\n", A, B)
+
+//#define DBG 1
+
+#if DBG
+#define DEBUGOUT(S)		printk(KERN_DEBUG S "\n")
+#define DEBUGOUT1(S, A...)	printk(KERN_DEBUG S "\n", A)
+#else
+#define DEBUGOUT(S)
+#define DEBUGOUT1(S, A...)
+#endif
+
+#define DEBUGFUNC(F) DEBUGOUT(F)
+#define DEBUGOUT2 DEBUGOUT1
+#define DEBUGOUT3 DEBUGOUT2
+#define DEBUGOUT7 DEBUGOUT3
+
+
+#define E1000_WRITE_REG(a, reg, value) ( \
+    ((a)->mac_type >= e1000_82543) ? \
+        (writel((value), ((a)->hw_addr + E1000_##reg))) : \
+        (writel((value), ((a)->hw_addr + E1000_82542_##reg))))
+
+#define E1000_READ_REG(a, reg) ( \
+    ((a)->mac_type >= e1000_82543) ? \
+        readl((a)->hw_addr + E1000_##reg) : \
+        readl((a)->hw_addr + E1000_82542_##reg))
+
+#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+    ((a)->mac_type >= e1000_82543) ? \
+        writel((value), ((a)->hw_addr + E1000_##reg + ((offset) << 2))) : \
+        writel((value), ((a)->hw_addr + E1000_82542_##reg + ((offset) << 2))))
+
+#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
+    ((a)->mac_type >= e1000_82543) ? \
+        readl((a)->hw_addr + E1000_##reg + ((offset) << 2)) : \
+        readl((a)->hw_addr + E1000_82542_##reg + ((offset) << 2)))
+
+#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, STATUS)
+
+#endif /* _E1000_OSDEP_H_ */
diff --git a/xen/drivers/net/e1000/e1000_param.c b/xen/drivers/net/e1000/e1000_param.c
new file mode 100644
index 0000000000..a11941f3f2
--- /dev/null
+++ b/xen/drivers/net/e1000/e1000_param.c
@@ -0,0 +1,655 @@
+/*******************************************************************************
+
+  
+  Copyright(c) 1999 - 2002 Intel Corporation. All rights reserved.
+  
+  This program is free software; you can redistribute it and/or modify it 
+  under the terms of the GNU General Public License as published by the Free 
+  Software Foundation; either version 2 of the License, or (at your option) 
+  any later version.
+  
+  This program is distributed in the hope that it will be useful, but WITHOUT 
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+  more details.
+  
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59 
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+
+#define E1000_MAX_NIC 32
+
+#define OPTION_UNSET    -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED  1
+
+/* Module Parameters are always initialized to -1, so that the driver
+ * can tell the difference between no user specified value or the
+ * user asking for the default value.
+ * The true default values are loaded in when e1000_check_options is called.
+ *
+ * This is a GCC extension to ANSI C.
+ * See the item "Labeled Elements in Initializers" in the section
+ * "Extensions to the C Language Family" of the GCC documentation.
+ */
+
+#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+#define E1000_PARAM(X, S) \
+static const int __devinitdata X[E1000_MAX_NIC + 1] = E1000_PARAM_INIT; \
+MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \
+MODULE_PARM_DESC(X, S);
+
+/* Transmit Descriptor Count
+ *
+ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
+ * Valid Range: 80-4096 for 82544
+ *
+ * Default Value: 256
+ */
+
+E1000_PARAM(TxDescriptors, "Number of transmit descriptors");
+
+/* Receive Descriptor Count
+ *
+ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
+ * Valid Range: 80-4096 for 82544
+ *
+ * Default Value: 80
+ */
+
+E1000_PARAM(RxDescriptors, "Number of receive descriptors");
+
+/* User Specified Speed Override
+ *
+ * Valid Range: 0, 10, 100, 1000
+ *  - 0    - auto-negotiate at all supported speeds
+ *  - 10   - only link at 10 Mbps
+ *  - 100  - only link at 100 Mbps
+ *  - 1000 - only link at 1000 Mbps
+ *
+ * Default Value: 0
+ */
+
+E1000_PARAM(Speed, "Speed setting");
+
+/* User Specified Duplex Override
+ *
+ * Valid Range: 0-2
+ *  - 0 - auto-negotiate for duplex
+ *  - 1 - only link at half duplex
+ *  - 2 - only link at full duplex
+ *
+ * Default Value: 0
+ */
+
+E1000_PARAM(Duplex, "Duplex setting");
+
+/* Auto-negotiation Advertisement Override
+ *
+ * Valid Range: 0x01-0x0F, 0x20-0x2F
+ *
+ * The AutoNeg value is a bit mask describing which speed and duplex
+ * combinations should be advertised during auto-negotiation.
+ * The supported speed and duplex modes are listed below
+ *
+ * Bit           7     6     5      4      3     2     1      0
+ * Speed (Mbps)  N/A   N/A   1000   N/A    100   100   10     10
+ * Duplex                    Full          Full  Half  Full   Half
+ *
+ * Default Value: 0x2F
+ */
+
+E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting");
+
+/* User Specified Flow Control Override
+ *
+ * Valid Range: 0-3
+ *  - 0 - No Flow Control
+ *  - 1 - Rx only, respond to PAUSE frames but do not generate them
+ *  - 2 - Tx only, generate PAUSE frames but ignore them on receive
+ *  - 3 - Full Flow Control Support
+ *
+ * Default Value: Read flow control settings from the EEPROM
+ */
+
+E1000_PARAM(FlowControl, "Flow Control setting");
+
+/* XsumRX - Receive Checksum Offload Enable/Disable
+ *
+ * Valid Range: 0, 1
+ *  - 0 - disables all checksum offload
+ *  - 1 - enables receive IP/TCP/UDP checksum offload
+ *        on 82543 based NICs
+ *
+ * Default Value: 1
+ */
+
+E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
+
+/* Transmit Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 64
+ */
+
+E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
+
+/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 0
+ */
+
+E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
+
+/* Receive Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 0/128
+ */
+
+E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
+
+/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ *
+ * Default Value: 128
+ */
+
+E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
+
+#define AUTONEG_ADV_DEFAULT  0x2F
+#define AUTONEG_ADV_MASK     0x2F
+#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL
+
+#define DEFAULT_TXD                  256
+#define MAX_TXD                      256
+#define MIN_TXD                       80
+#define MAX_82544_TXD               4096
+
+#define DEFAULT_RXD                   80
+#define MAX_RXD                      256
+#define MIN_RXD                       80
+#define MAX_82544_RXD               4096
+
+#define DEFAULT_RDTR                   0
+#define MAX_RXDELAY               0xFFFF
+#define MIN_RXDELAY                    0
+
+#define DEFAULT_RADV                 128
+#define MAX_RXABSDELAY            0xFFFF
+#define MIN_RXABSDELAY                 0
+
+#define DEFAULT_TIDV                  64
+#define MAX_TXDELAY               0xFFFF
+#define MIN_TXDELAY                    0
+
+#define DEFAULT_TADV                  64
+#define MAX_TXABSDELAY            0xFFFF
+#define MIN_TXABSDELAY                 0
+
+struct e1000_option {
+	enum { enable_option, range_option, list_option } type;
+	char *name;
+	char *err;
+	int  def;
+	union {
+		struct { /* range_option info */
+			int min;
+			int max;
+		} r;
+		struct { /* list_option info */
+			int nr;
+			struct e1000_opt_list { int i; char *str; } *p;
+		} l;
+	} arg;
+};
+
+static int __devinit
+e1000_validate_option(int *value, struct e1000_option *opt)
+{
+	if(*value == OPTION_UNSET) {
+		*value = opt->def;
+		return 0;
+	}
+
+	switch (opt->type) {
+	case enable_option:
+		switch (*value) {
+		case OPTION_ENABLED:
+			printk(KERN_INFO "%s Enabled\n", opt->name);
+			return 0;
+		case OPTION_DISABLED:
+			printk(KERN_INFO "%s Disabled\n", opt->name);
+			return 0;
+		}
+		break;
+	case range_option:
+		if(*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+			printk(KERN_INFO "%s set to %i\n", opt->name, *value);
+			return 0;
+		}
+		break;
+	case list_option: {
+		int i;
+		struct e1000_opt_list *ent;
+
+		for(i = 0; i < opt->arg.l.nr; i++) {
+			ent = &opt->arg.l.p[i];
+			if(*value == ent->i) {
+				if(ent->str[0] != '\0')
+					printk(KERN_INFO "%s\n", ent->str);
+				return 0;
+			}
+		}
+	}
+		break;
+	default:
+		BUG();
+	}
+
+	printk(KERN_INFO "Invalid %s specified (%i) %s\n",
+	       opt->name, *value, opt->err);
+	*value = opt->def;
+	return -1;
+}
+
+static void e1000_check_fiber_options(struct e1000_adapter *adapter);
+static void e1000_check_copper_options(struct e1000_adapter *adapter);
+
+/**
+ * e1000_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line paramters for valid user
+ * input.  If an invalid value is given, or if no user specified
+ * value exists, a default value is used.  The final value is stored
+ * in a variable in the adapter structure.
+ **/
+
+void __devinit
+e1000_check_options(struct e1000_adapter *adapter)
+{
+	int bd = adapter->bd_number;
+	if(bd >= E1000_MAX_NIC) {
+		printk(KERN_NOTICE
+		       "Warning: no configuration for board #%i\n", bd);
+		printk(KERN_NOTICE "Using defaults for all values\n");
+		bd = E1000_MAX_NIC;
+	}
+
+	{ /* Transmit Descriptor Count */
+		struct e1000_option opt = {
+			.type = range_option,
+			.name = "Transmit Descriptors",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_TXD),
+			.def  = DEFAULT_TXD,
+			.arg  = { .r = { .min = MIN_TXD }}
+		};
+		struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+		e1000_mac_type mac_type = adapter->hw.mac_type;
+		opt.arg.r.max = mac_type < e1000_82544 ?
+			MAX_TXD : MAX_82544_TXD;
+
+		tx_ring->count = TxDescriptors[bd];
+		e1000_validate_option(&tx_ring->count, &opt);
+		E1000_ROUNDUP(tx_ring->count, REQ_TX_DESCRIPTOR_MULTIPLE);
+	}
+	{ /* Receive Descriptor Count */
+		struct e1000_option opt = {
+			.type = range_option,
+			.name = "Receive Descriptors",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_RXD),
+			.def  = DEFAULT_RXD,
+			.arg  = { .r = { .min = MIN_RXD }}
+		};
+		struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+		e1000_mac_type mac_type = adapter->hw.mac_type;
+		opt.arg.r.max = mac_type < e1000_82544 ? MAX_RXD : MAX_82544_RXD;
+
+		rx_ring->count = RxDescriptors[bd];
+		e1000_validate_option(&rx_ring->count, &opt);
+		E1000_ROUNDUP(rx_ring->count, REQ_RX_DESCRIPTOR_MULTIPLE);
+	}
+	{ /* Checksum Offload Enable/Disable */
+		struct e1000_option opt = {
+			.type = enable_option,
+			.name = "Checksum Offload",
+			.err  = "defaulting to Enabled",
+			.def  = OPTION_ENABLED
+		};
+
+		int rx_csum = XsumRX[bd];
+		e1000_validate_option(&rx_csum, &opt);
+		adapter->rx_csum = rx_csum;
+	}
+	{ /* Flow Control */
+
+		struct e1000_opt_list fc_list[] =
+			{{ e1000_fc_none,    "Flow Control Disabled" },
+			 { e1000_fc_rx_pause,"Flow Control Receive Only" },
+			 { e1000_fc_tx_pause,"Flow Control Transmit Only" },
+			 { e1000_fc_full,    "Flow Control Enabled" },
+			 { e1000_fc_default, "Flow Control Hardware Default" }};
+
+		struct e1000_option opt = {
+			.type = list_option,
+			.name = "Flow Control",
+			.err  = "reading default settings from EEPROM",
+			.def  = e1000_fc_default,
+			.arg  = { .l = { .nr = ARRAY_SIZE(fc_list), .p = fc_list }}
+		};
+
+		int fc = FlowControl[bd];
+		e1000_validate_option(&fc, &opt);
+		adapter->hw.fc = adapter->hw.original_fc = fc;
+	}
+	{ /* Transmit Interrupt Delay */
+		char *tidv = "using default of " __MODULE_STRING(DEFAULT_TIDV);
+		struct e1000_option opt = {
+			.type = range_option,
+			.name = "Transmit Interrupt Delay",
+			.arg  = { .r = { .min = MIN_TXDELAY, .max = MAX_TXDELAY }}
+		};
+		opt.def = DEFAULT_TIDV;
+		opt.err = tidv;
+
+		adapter->tx_int_delay = TxIntDelay[bd];
+		e1000_validate_option(&adapter->tx_int_delay, &opt);
+	}
+	{ /* Transmit Absolute Interrupt Delay */
+		char *tadv = "using default of " __MODULE_STRING(DEFAULT_TADV);
+		struct e1000_option opt = {
+			.type = range_option,
+			.name = "Transmit Absolute Interrupt Delay",
+			.arg  = { .r = { .min = MIN_TXABSDELAY, .max = MAX_TXABSDELAY }}
+		};
+		opt.def = DEFAULT_TADV;
+		opt.err = tadv;
+
+		adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
+		e1000_validate_option(&adapter->tx_abs_int_delay, &opt);
+	}
+	{ /* Receive Interrupt Delay */
+		char *rdtr = "using default of " __MODULE_STRING(DEFAULT_RDTR);
+		struct e1000_option opt = {
+			.type = range_option,
+			.name = "Receive Interrupt Delay",
+			.arg  = { .r = { .min = MIN_RXDELAY, .max = MAX_RXDELAY }}
+		};
+		opt.def = DEFAULT_RDTR;
+		opt.err = rdtr;
+
+		adapter->rx_int_delay = RxIntDelay[bd];
+		e1000_validate_option(&adapter->rx_int_delay, &opt);
+	}
+	{ /* Receive Absolute Interrupt Delay */
+		char *radv = "using default of " __MODULE_STRING(DEFAULT_RADV);
+		struct e1000_option opt = {
+			.type = range_option,
+			.name = "Receive Absolute Interrupt Delay",
+			.arg  = { .r = { .min = MIN_RXABSDELAY, .max = MAX_RXABSDELAY }}
+		};
+		opt.def = DEFAULT_RADV;
+		opt.err = radv;
+
+		adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
+		e1000_validate_option(&adapter->rx_abs_int_delay, &opt);
+	}
+	
+	switch(adapter->hw.media_type) {
+	case e1000_media_type_fiber:
+		e1000_check_fiber_options(adapter);
+		break;
+	case e1000_media_type_copper:
+		e1000_check_copper_options(adapter);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version
+ * @adapter: board private structure
+ *
+ * Handles speed and duplex options on fiber adapters
+ **/
+
+static void __devinit
+e1000_check_fiber_options(struct e1000_adapter *adapter)
+{
+	int bd = adapter->bd_number;
+	bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
+
+	if((Speed[bd] != OPTION_UNSET)) {
+		printk(KERN_INFO "Speed not valid for fiber adapters, "
+		       "parameter ignored\n");
+	}
+	if((Duplex[bd] != OPTION_UNSET)) {
+		printk(KERN_INFO "Duplex not valid for fiber adapters, "
+		       "parameter ignored\n");
+	}
+	if((AutoNeg[bd] != OPTION_UNSET)) {
+		printk(KERN_INFO "AutoNeg not valid for fiber adapters, "
+		       "parameter ignored\n");
+	}
+}
+
+/**
+ * e1000_check_copper_options - Range Checking for Link Options, Copper Version
+ * @adapter: board private structure
+ *
+ * Handles speed and duplex options on copper adapters
+ **/
+
+static void __devinit
+e1000_check_copper_options(struct e1000_adapter *adapter)
+{
+	int speed, dplx;
+	int bd = adapter->bd_number;
+	bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
+
+	{ /* Speed */
+		struct e1000_opt_list speed_list[] = {{          0, "" },
+						      {   SPEED_10, "" },
+						      {  SPEED_100, "" },
+						      { SPEED_1000, "" }};
+
+		struct e1000_option opt = {
+			.type = list_option,
+			.name = "Speed",
+			.err  = "parameter ignored",
+			.def  = 0,
+			.arg  = { .l = { .nr = ARRAY_SIZE(speed_list), .p = speed_list }}
+		};
+
+		speed = Speed[bd];
+		e1000_validate_option(&speed, &opt);
+	}
+	{ /* Duplex */
+		struct e1000_opt_list dplx_list[] = {{           0, "" },
+						     { HALF_DUPLEX, "" },
+						     { FULL_DUPLEX, "" }};
+
+		struct e1000_option opt = {
+			.type = list_option,
+			.name = "Duplex",
+			.err  = "parameter ignored",
+			.def  = 0,
+			.arg  = { .l = { .nr = ARRAY_SIZE(dplx_list), .p = dplx_list }}
+		};
+
+		dplx = Duplex[bd];
+		e1000_validate_option(&dplx, &opt);
+	}
+
+	if(AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) {
+		printk(KERN_INFO
+		       "AutoNeg specified along with Speed or Duplex, "
+		       "parameter ignored\n");
+		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
+	} else { /* Autoneg */
+		struct e1000_opt_list an_list[] =
+			#define AA "AutoNeg advertising "
+			{{ 0x01, AA "10/HD" },
+			 { 0x02, AA "10/FD" },
+			 { 0x03, AA "10/FD, 10/HD" },
+			 { 0x04, AA "100/HD" },
+			 { 0x05, AA "100/HD, 10/HD" },
+			 { 0x06, AA "100/HD, 10/FD" },
+			 { 0x07, AA "100/HD, 10/FD, 10/HD" },
+			 { 0x08, AA "100/FD" },
+			 { 0x09, AA "100/FD, 10/HD" },
+			 { 0x0a, AA "100/FD, 10/FD" },
+			 { 0x0b, AA "100/FD, 10/FD, 10/HD" },
+			 { 0x0c, AA "100/FD, 100/HD" },
+			 { 0x0d, AA "100/FD, 100/HD, 10/HD" },
+			 { 0x0e, AA "100/FD, 100/HD, 10/FD" },
+			 { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" },
+			 { 0x20, AA "1000/FD" },
+			 { 0x21, AA "1000/FD, 10/HD" },
+			 { 0x22, AA "1000/FD, 10/FD" },
+			 { 0x23, AA "1000/FD, 10/FD, 10/HD" },
+			 { 0x24, AA "1000/FD, 100/HD" },
+			 { 0x25, AA "1000/FD, 100/HD, 10/HD" },
+			 { 0x26, AA "1000/FD, 100/HD, 10/FD" },
+			 { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" },
+			 { 0x28, AA "1000/FD, 100/FD" },
+			 { 0x29, AA "1000/FD, 100/FD, 10/HD" },
+			 { 0x2a, AA "1000/FD, 100/FD, 10/FD" },
+			 { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" },
+			 { 0x2c, AA "1000/FD, 100/FD, 100/HD" },
+			 { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" },
+			 { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" },
+			 { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }};
+
+		struct e1000_option opt = {
+			.type = list_option,
+			.name = "AutoNeg",
+			.err  = "parameter ignored",
+			.def  = AUTONEG_ADV_DEFAULT,
+			.arg  = { .l = { .nr = ARRAY_SIZE(an_list), .p = an_list }}
+		};
+
+		int an = AutoNeg[bd];
+		e1000_validate_option(&an, &opt);
+		adapter->hw.autoneg_advertised = an;
+	}
+
+	switch (speed + dplx) {
+	case 0:
+		adapter->hw.autoneg = 1;
+		if(Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET)
+			printk(KERN_INFO
+			       "Speed and duplex autonegotiation enabled\n");
+		break;
+	case HALF_DUPLEX:
+		printk(KERN_INFO "Half Duplex specified without Speed\n");
+		printk(KERN_INFO "Using Autonegotiation at Half Duplex only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+		                                 ADVERTISE_100_HALF;
+		break;
+	case FULL_DUPLEX:
+		printk(KERN_INFO "Full Duplex specified without Speed\n");
+		printk(KERN_INFO "Using Autonegotiation at Full Duplex only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_10_FULL |
+		                                 ADVERTISE_100_FULL |
+		                                 ADVERTISE_1000_FULL;
+		break;
+	case SPEED_10:
+		printk(KERN_INFO "10 Mbps Speed specified without Duplex\n");
+		printk(KERN_INFO "Using Autonegotiation at 10 Mbps only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+		                                 ADVERTISE_10_FULL;
+		break;
+	case SPEED_10 + HALF_DUPLEX:
+		printk(KERN_INFO "Forcing to 10 Mbps Half Duplex\n");
+		adapter->hw.autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_10_half;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_10 + FULL_DUPLEX:
+		printk(KERN_INFO "Forcing to 10 Mbps Full Duplex\n");
+		adapter->hw.autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_10_full;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_100:
+		printk(KERN_INFO "100 Mbps Speed specified without Duplex\n");
+		printk(KERN_INFO "Using Autonegotiation at 100 Mbps only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_100_HALF |
+		                                 ADVERTISE_100_FULL;
+		break;
+	case SPEED_100 + HALF_DUPLEX:
+		printk(KERN_INFO "Forcing to 100 Mbps Half Duplex\n");
+		adapter->hw.autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_100_half;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_100 + FULL_DUPLEX:
+		printk(KERN_INFO "Forcing to 100 Mbps Full Duplex\n");
+		adapter->hw.autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_100_full;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_1000:
+		printk(KERN_INFO "1000 Mbps Speed specified without Duplex\n");
+		printk(KERN_INFO
+		       "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + HALF_DUPLEX:
+		printk(KERN_INFO "Half Duplex is not supported at 1000 Mbps\n");
+		printk(KERN_INFO
+		       "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + FULL_DUPLEX:
+		printk(KERN_INFO
+		       "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
+		adapter->hw.autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	default:
+		BUG();
+	}
+
+	/* Speed, AutoNeg and MDI/MDI-X must all play nice */
+	if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) {
+		printk(KERN_INFO "Speed, AutoNeg and MDI-X specifications are "
+		       "incompatible. Setting MDI-X to a compatible value.\n");
+	}
+}
+
diff --git a/xen/drivers/net/ne/8390.c b/xen/drivers/net/ne/8390.c
new file mode 100644
index 0000000000..aa299a3470
--- /dev/null
+++ b/xen/drivers/net/ne/8390.c
@@ -0,0 +1,1158 @@
+/* 8390.c: A general NS8390 ethernet driver core for linux. */
+/*
+	Written 1992-94 by Donald Becker.
+  
+	Copyright 1993 United States Government as represented by the
+	Director, National Security Agency.
+
+	This software may be used and distributed according to the terms
+	of the GNU General Public License, incorporated herein by reference.
+
+	The author may be reached as becker@scyld.com, or C/O
+	Scyld Computing Corporation
+	410 Severn Ave., Suite 210
+	Annapolis MD 21403
+
+  
+  This is the chip-specific code for many 8390-based ethernet adaptors.
+  This is not a complete driver, it must be combined with board-specific
+  code such as ne.c, wd.c, 3c503.c, etc.
+
+  Seeing how at least eight drivers use this code, (not counting the
+  PCMCIA ones either) it is easy to break some card by what seems like
+  a simple innocent change. Please contact me or Donald if you think
+  you have found something that needs changing. -- PG
+
+
+  Changelog:
+
+  Paul Gortmaker	: remove set_bit lock, other cleanups.
+  Paul Gortmaker	: add ei_get_8390_hdr() so we can pass skb's to 
+			  ei_block_input() for eth_io_copy_and_sum().
+  Paul Gortmaker	: exchange static int ei_pingpong for a #define,
+			  also add better Tx error handling.
+  Paul Gortmaker	: rewrite Rx overrun handling as per NS specs.
+  Alexey Kuznetsov	: use the 8390's six bit hash multicast filter.
+  Paul Gortmaker	: tweak ANK's above multicast changes a bit.
+  Paul Gortmaker	: update packet statistics for v2.1.x
+  Alan Cox		: support arbitary stupid port mappings on the
+  			  68K Macintosh. Support >16bit I/O spaces
+  Paul Gortmaker	: add kmod support for auto-loading of the 8390
+			  module by all drivers that require it.
+  Alan Cox		: Spinlocking work, added 'BUG_83C690'
+  Paul Gortmaker	: Separate out Tx timeout code from Tx path.
+
+  Sources:
+  The National Semiconductor LAN Databook, and the 3Com 3c503 databook.
+
+  */
+
+static const char version[] =
+    "8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
+
+#include <xeno/module.h>
+#include <xeno/kernel.h>
+#include <xeno/sched.h>
+//#include <xeno/fs.h>
+#include <xeno/types.h>
+//#include <xeno/ptrace.h>
+#include <xeno/lib.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <xeno/delay.h>
+#include <xeno/errno.h>
+//#include <xeno/fcntl.h>
+#include <xeno/in.h>
+#include <xeno/interrupt.h>
+#include <xeno/init.h>
+
+#include <xeno/netdevice.h>
+#include <xeno/etherdevice.h>
+
+#define NS8390_CORE
+#include "8390.h"
+
+#define BUG_83C690
+
+/* These are the operational function interfaces to board-specific
+   routines.
+	void reset_8390(struct net_device *dev)
+		Resets the board associated with DEV, including a hardware reset of
+		the 8390.  This is only called when there is a transmit timeout, and
+		it is always followed by 8390_init().
+	void block_output(struct net_device *dev, int count, const unsigned char *buf,
+					  int start_page)
+		Write the COUNT bytes of BUF to the packet buffer at START_PAGE.  The
+		"page" value uses the 8390's 256-byte pages.
+	void get_8390_hdr(struct net_device *dev, struct e8390_hdr *hdr, int ring_page)
+		Read the 4 byte, page aligned 8390 header. *If* there is a
+		subsequent read, it will be of the rest of the packet.
+	void block_input(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset)
+		Read COUNT bytes from the packet buffer into the skb data area. Start 
+		reading from RING_OFFSET, the address as the 8390 sees it.  This will always
+		follow the read of the 8390 header. 
+*/
+#define ei_reset_8390 (ei_local->reset_8390)
+#define ei_block_output (ei_local->block_output)
+#define ei_block_input (ei_local->block_input)
+#define ei_get_8390_hdr (ei_local->get_8390_hdr)
+
+/* use 0 for production, 1 for verification, >2 for debug */
+#ifndef ei_debug
+int ei_debug = 1;
+#endif
+
+/* Index to functions. */
+static void ei_tx_intr(struct net_device *dev);
+static void ei_tx_err(struct net_device *dev);
+static void ei_tx_timeout(struct net_device *dev);
+static void ei_receive(struct net_device *dev);
+static void ei_rx_overrun(struct net_device *dev);
+
+/* Routines generic to NS8390-based boards. */
+static void NS8390_trigger_send(struct net_device *dev, unsigned int length,
+								int start_page);
+static void set_multicast_list(struct net_device *dev);
+static void do_set_multicast_list(struct net_device *dev);
+
+/*
+ *	SMP and the 8390 setup.
+ *
+ *	The 8390 isnt exactly designed to be multithreaded on RX/TX. There is
+ *	a page register that controls bank and packet buffer access. We guard
+ *	this with ei_local->page_lock. Nobody should assume or set the page other
+ *	than zero when the lock is not held. Lock holders must restore page 0
+ *	before unlocking. Even pure readers must take the lock to protect in 
+ *	page 0.
+ *
+ *	To make life difficult the chip can also be very slow. We therefore can't
+ *	just use spinlocks. For the longer lockups we disable the irq the device
+ *	sits on and hold the lock. We must hold the lock because there is a dual
+ *	processor case other than interrupts (get stats/set multicast list in
+ *	parallel with each other and transmit).
+ *
+ *	Note: in theory we can just disable the irq on the card _but_ there is
+ *	a latency on SMP irq delivery. So we can easily go "disable irq" "sync irqs"
+ *	enter lock, take the queued irq. So we waddle instead of flying.
+ *
+ *	Finally by special arrangement for the purpose of being generally 
+ *	annoying the transmit function is called bh atomic. That places
+ *	restrictions on the user context callers as disable_irq won't save
+ *	them.
+ */
+ 
+
+
+/**
+ * ei_open - Open/initialize the board.
+ * @dev: network device to initialize
+ *
+ * This routine goes all-out, setting everything
+ * up anew at each open, even though many of these registers should only
+ * need to be set once at boot.
+ */
+int ei_open(struct net_device *dev)
+{
+	unsigned long flags;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+
+	/* This can't happen unless somebody forgot to call ethdev_init(). */
+	if (ei_local == NULL) 
+	{
+		printk(KERN_EMERG "%s: ei_open passed a non-existent device!\n", dev->name);
+		return -ENXIO;
+	}
+	
+	/* The card I/O part of the driver (e.g. 3c503) can hook a Tx timeout
+	    wrapper that does e.g. media check & then calls ei_tx_timeout. */
+	if (dev->tx_timeout == NULL)
+		 dev->tx_timeout = ei_tx_timeout;
+	if (dev->watchdog_timeo <= 0)
+		 dev->watchdog_timeo = TX_TIMEOUT;
+    
+	/*
+	 *	Grab the page lock so we own the register set, then call
+	 *	the init function.
+	 */
+      
+      	spin_lock_irqsave(&ei_local->page_lock, flags);
+	NS8390_init(dev, 1);
+	/* Set the flag before we drop the lock, That way the IRQ arrives
+	   after its set and we get no silly warnings */
+	netif_start_queue(dev);
+      	spin_unlock_irqrestore(&ei_local->page_lock, flags);
+	ei_local->irqlock = 0;
+	return 0;
+}
+
+/**
+ * ei_close - shut down network device
+ * @dev: network device to close
+ *
+ * Opposite of ei_open(). Only used when "ifconfig <devname> down" is done.
+ */
+int ei_close(struct net_device *dev)
+{
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	unsigned long flags;
+
+	/*
+	 *	Hold the page lock during close
+	 */
+	 	
+      	spin_lock_irqsave(&ei_local->page_lock, flags);
+	NS8390_init(dev, 0);
+      	spin_unlock_irqrestore(&ei_local->page_lock, flags);
+	netif_stop_queue(dev);
+	return 0;
+}
+
+/**
+ * ei_tx_timeout - handle transmit time out condition
+ * @dev: network device which has apparently fallen asleep
+ *
+ * Called by kernel when device never acknowledges a transmit has
+ * completed (or failed) - i.e. never posted a Tx related interrupt.
+ */
+
+void ei_tx_timeout(struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	int txsr, isr, tickssofar = jiffies - dev->trans_start;
+	unsigned long flags;
+
+	ei_local->stat.tx_errors++;
+
+	spin_lock_irqsave(&ei_local->page_lock, flags);
+	txsr = inb(e8390_base+EN0_TSR);
+	isr = inb(e8390_base+EN0_ISR);
+	spin_unlock_irqrestore(&ei_local->page_lock, flags);
+
+	printk(KERN_DEBUG "%s: Tx timed out, %s TSR=%#2x, ISR=%#2x, t=%d.\n",
+		dev->name, (txsr & ENTSR_ABT) ? "excess collisions." :
+		(isr) ? "lost interrupt?" : "cable problem?", txsr, isr, tickssofar);
+
+	if (!isr && !ei_local->stat.tx_packets) 
+	{
+		/* The 8390 probably hasn't gotten on the cable yet. */
+		ei_local->interface_num ^= 1;   /* Try a different xcvr.  */
+	}
+
+	/* Ugly but a reset can be slow, yet must be protected */
+		
+	disable_irq_nosync(dev->irq);
+	spin_lock(&ei_local->page_lock);
+		
+	/* Try to restart the card.  Perhaps the user has fixed something. */
+	ei_reset_8390(dev);
+	NS8390_init(dev, 1);
+		
+	spin_unlock(&ei_local->page_lock);
+	enable_irq(dev->irq);
+	netif_wake_queue(dev);
+}
+    
+/**
+ * ei_start_xmit - begin packet transmission
+ * @skb: packet to be sent
+ * @dev: network device to which packet is sent
+ *
+ * Sends a packet to an 8390 network device.
+ */
+ 
+static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	int length, send_length, output_page;
+	unsigned long flags;
+
+	length = skb->len;
+
+	/* Mask interrupts from the ethercard. 
+	   SMP: We have to grab the lock here otherwise the IRQ handler
+	   on another CPU can flip window and race the IRQ mask set. We end
+	   up trashing the mcast filter not disabling irqs if we dont lock */
+	   
+	spin_lock_irqsave(&ei_local->page_lock, flags);
+	outb_p(0x00, e8390_base + EN0_IMR);
+	spin_unlock_irqrestore(&ei_local->page_lock, flags);
+	
+	
+	/*
+	 *	Slow phase with lock held.
+	 */
+	 
+	disable_irq_nosync(dev->irq);
+	
+	spin_lock(&ei_local->page_lock);
+	
+	ei_local->irqlock = 1;
+
+	send_length = ETH_ZLEN < length ? length : ETH_ZLEN;
+    
+#ifdef EI_PINGPONG
+
+	/*
+	 * We have two Tx slots available for use. Find the first free
+	 * slot, and then perform some sanity checks. With two Tx bufs,
+	 * you get very close to transmitting back-to-back packets. With
+	 * only one Tx buf, the transmitter sits idle while you reload the
+	 * card, leaving a substantial gap between each transmitted packet.
+	 */
+
+	if (ei_local->tx1 == 0) 
+	{
+		output_page = ei_local->tx_start_page;
+		ei_local->tx1 = send_length;
+		if (ei_debug  &&  ei_local->tx2 > 0)
+			printk(KERN_DEBUG "%s: idle transmitter tx2=%d, lasttx=%d, txing=%d.\n",
+				dev->name, ei_local->tx2, ei_local->lasttx, ei_local->txing);
+	}
+	else if (ei_local->tx2 == 0) 
+	{
+		output_page = ei_local->tx_start_page + TX_1X_PAGES;
+		ei_local->tx2 = send_length;
+		if (ei_debug  &&  ei_local->tx1 > 0)
+			printk(KERN_DEBUG "%s: idle transmitter, tx1=%d, lasttx=%d, txing=%d.\n",
+				dev->name, ei_local->tx1, ei_local->lasttx, ei_local->txing);
+	}
+	else
+	{	/* We should never get here. */
+		if (ei_debug)
+			printk(KERN_DEBUG "%s: No Tx buffers free! tx1=%d tx2=%d last=%d\n",
+				dev->name, ei_local->tx1, ei_local->tx2, ei_local->lasttx);
+		ei_local->irqlock = 0;
+		netif_stop_queue(dev);
+		outb_p(ENISR_ALL, e8390_base + EN0_IMR);
+		spin_unlock(&ei_local->page_lock);
+		enable_irq(dev->irq);
+		ei_local->stat.tx_errors++;
+		return 1;
+	}
+
+	/*
+	 * Okay, now upload the packet and trigger a send if the transmitter
+	 * isn't already sending. If it is busy, the interrupt handler will
+	 * trigger the send later, upon receiving a Tx done interrupt.
+	 */
+
+	ei_block_output(dev, length, skb->data, output_page);
+	if (! ei_local->txing) 
+	{
+		ei_local->txing = 1;
+		NS8390_trigger_send(dev, send_length, output_page);
+		dev->trans_start = jiffies;
+		if (output_page == ei_local->tx_start_page) 
+		{
+			ei_local->tx1 = -1;
+			ei_local->lasttx = -1;
+		}
+		else 
+		{
+			ei_local->tx2 = -1;
+			ei_local->lasttx = -2;
+		}
+	}
+	else ei_local->txqueue++;
+
+	if (ei_local->tx1  &&  ei_local->tx2)
+		netif_stop_queue(dev);
+	else
+		netif_start_queue(dev);
+
+#else	/* EI_PINGPONG */
+
+	/*
+	 * Only one Tx buffer in use. You need two Tx bufs to come close to
+	 * back-to-back transmits. Expect a 20 -> 25% performance hit on
+	 * reasonable hardware if you only use one Tx buffer.
+	 */
+
+	ei_block_output(dev, length, skb->data, ei_local->tx_start_page);
+	ei_local->txing = 1;
+	NS8390_trigger_send(dev, send_length, ei_local->tx_start_page);
+	dev->trans_start = jiffies;
+	netif_stop_queue(dev);
+
+#endif	/* EI_PINGPONG */
+
+	/* Turn 8390 interrupts back on. */
+	ei_local->irqlock = 0;
+	outb_p(ENISR_ALL, e8390_base + EN0_IMR);
+	
+	spin_unlock(&ei_local->page_lock);
+	enable_irq(dev->irq);
+
+	dev_kfree_skb (skb);
+	ei_local->stat.tx_bytes += send_length;
+    
+	return 0;
+}
+
+/**
+ * ei_interrupt - handle the interrupts from an 8390
+ * @irq: interrupt number
+ * @dev_id: a pointer to the net_device
+ * @regs: unused
+ *
+ * Handle the ether interface interrupts. We pull packets from
+ * the 8390 via the card specific functions and fire them at the networking
+ * stack. We also handle transmit completions and wake the transmit path if
+ * neccessary. We also update the counters and do other housekeeping as
+ * needed.
+ */
+
+void ei_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+	struct net_device *dev = dev_id;
+	long e8390_base;
+	int interrupts, nr_serviced = 0;
+	struct ei_device *ei_local;
+    
+	if (dev == NULL) 
+	{
+		printk ("net_interrupt(): irq %d for unknown device.\n", irq);
+		return;
+	}
+    
+	e8390_base = dev->base_addr;
+	ei_local = (struct ei_device *) dev->priv;
+
+	/*
+	 *	Protect the irq test too.
+	 */
+	 
+	spin_lock(&ei_local->page_lock);
+
+	if (ei_local->irqlock) 
+	{
+#if 1 /* This might just be an interrupt for a PCI device sharing this line */
+		/* The "irqlock" check is only for testing. */
+		printk(ei_local->irqlock
+			   ? "%s: Interrupted while interrupts are masked! isr=%#2x imr=%#2x.\n"
+			   : "%s: Reentering the interrupt handler! isr=%#2x imr=%#2x.\n",
+			   dev->name, inb_p(e8390_base + EN0_ISR),
+			   inb_p(e8390_base + EN0_IMR));
+#endif
+		spin_unlock(&ei_local->page_lock);
+		return;
+	}
+    
+	/* Change to page 0 and read the intr status reg. */
+	outb_p(E8390_NODMA+E8390_PAGE0, e8390_base + E8390_CMD);
+	if (ei_debug > 3)
+		printk(KERN_DEBUG "%s: interrupt(isr=%#2.2x).\n", dev->name,
+			   inb_p(e8390_base + EN0_ISR));
+    
+	/* !!Assumption!! -- we stay in page 0.	 Don't break this. */
+	while ((interrupts = inb_p(e8390_base + EN0_ISR)) != 0
+		   && ++nr_serviced < MAX_SERVICE) 
+	{
+		if (!netif_running(dev)) {
+			printk(KERN_WARNING "%s: interrupt from stopped card\n", dev->name);
+			/* rmk - acknowledge the interrupts */
+			outb_p(interrupts, e8390_base + EN0_ISR);
+			interrupts = 0;
+			break;
+		}
+		if (interrupts & ENISR_OVER) 
+			ei_rx_overrun(dev);
+		else if (interrupts & (ENISR_RX+ENISR_RX_ERR)) 
+		{
+			/* Got a good (?) packet. */
+			ei_receive(dev);
+		}
+		/* Push the next to-transmit packet through. */
+		if (interrupts & ENISR_TX)
+			ei_tx_intr(dev);
+		else if (interrupts & ENISR_TX_ERR)
+			ei_tx_err(dev);
+
+		if (interrupts & ENISR_COUNTERS) 
+		{
+			ei_local->stat.rx_frame_errors += inb_p(e8390_base + EN0_COUNTER0);
+			ei_local->stat.rx_crc_errors   += inb_p(e8390_base + EN0_COUNTER1);
+			ei_local->stat.rx_missed_errors+= inb_p(e8390_base + EN0_COUNTER2);
+			outb_p(ENISR_COUNTERS, e8390_base + EN0_ISR); /* Ack intr. */
+		}
+		
+		/* Ignore any RDC interrupts that make it back to here. */
+		if (interrupts & ENISR_RDC) 
+		{
+			outb_p(ENISR_RDC, e8390_base + EN0_ISR);
+		}
+
+		outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base + E8390_CMD);
+	}
+    
+	if (interrupts && ei_debug) 
+	{
+		outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base + E8390_CMD);
+		if (nr_serviced >= MAX_SERVICE) 
+		{
+			/* 0xFF is valid for a card removal */
+			if(interrupts!=0xFF)
+				printk(KERN_WARNING "%s: Too much work at interrupt, status %#2.2x\n",
+				   dev->name, interrupts);
+			outb_p(ENISR_ALL, e8390_base + EN0_ISR); /* Ack. most intrs. */
+		} else {
+			printk(KERN_WARNING "%s: unknown interrupt %#2x\n", dev->name, interrupts);
+			outb_p(0xff, e8390_base + EN0_ISR); /* Ack. all intrs. */
+		}
+	}
+	spin_unlock(&ei_local->page_lock);
+	return;
+}
+
+/**
+ * ei_tx_err - handle transmitter error
+ * @dev: network device which threw the exception
+ *
+ * A transmitter error has happened. Most likely excess collisions (which
+ * is a fairly normal condition). If the error is one where the Tx will
+ * have been aborted, we try and send another one right away, instead of
+ * letting the failed packet sit and collect dust in the Tx buffer. This
+ * is a much better solution as it avoids kernel based Tx timeouts, and
+ * an unnecessary card reset.
+ *
+ * Called with lock held.
+ */
+
+static void ei_tx_err(struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	unsigned char txsr = inb_p(e8390_base+EN0_TSR);
+	unsigned char tx_was_aborted = txsr & (ENTSR_ABT+ENTSR_FU);
+
+#ifdef VERBOSE_ERROR_DUMP
+	printk(KERN_DEBUG "%s: transmitter error (%#2x): ", dev->name, txsr);
+	if (txsr & ENTSR_ABT)
+		printk("excess-collisions ");
+	if (txsr & ENTSR_ND)
+		printk("non-deferral ");
+	if (txsr & ENTSR_CRS)
+		printk("lost-carrier ");
+	if (txsr & ENTSR_FU)
+		printk("FIFO-underrun ");
+	if (txsr & ENTSR_CDH)
+		printk("lost-heartbeat ");
+	printk("\n");
+#endif
+
+	outb_p(ENISR_TX_ERR, e8390_base + EN0_ISR); /* Ack intr. */
+
+	if (tx_was_aborted)
+		ei_tx_intr(dev);
+	else 
+	{
+		ei_local->stat.tx_errors++;
+		if (txsr & ENTSR_CRS) ei_local->stat.tx_carrier_errors++;
+		if (txsr & ENTSR_CDH) ei_local->stat.tx_heartbeat_errors++;
+		if (txsr & ENTSR_OWC) ei_local->stat.tx_window_errors++;
+	}
+}
+
+/**
+ * ei_tx_intr - transmit interrupt handler
+ * @dev: network device for which tx intr is handled
+ *
+ * We have finished a transmit: check for errors and then trigger the next
+ * packet to be sent. Called with lock held.
+ */
+
+static void ei_tx_intr(struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	int status = inb(e8390_base + EN0_TSR);
+    
+	outb_p(ENISR_TX, e8390_base + EN0_ISR); /* Ack intr. */
+
+#ifdef EI_PINGPONG
+
+	/*
+	 * There are two Tx buffers, see which one finished, and trigger
+	 * the send of another one if it exists.
+	 */
+	ei_local->txqueue--;
+
+	if (ei_local->tx1 < 0) 
+	{
+		if (ei_local->lasttx != 1 && ei_local->lasttx != -1)
+			printk(KERN_ERR "%s: bogus last_tx_buffer %d, tx1=%d.\n",
+				ei_local->name, ei_local->lasttx, ei_local->tx1);
+		ei_local->tx1 = 0;
+		if (ei_local->tx2 > 0) 
+		{
+			ei_local->txing = 1;
+			NS8390_trigger_send(dev, ei_local->tx2, ei_local->tx_start_page + 6);
+			dev->trans_start = jiffies;
+			ei_local->tx2 = -1,
+			ei_local->lasttx = 2;
+		}
+		else ei_local->lasttx = 20, ei_local->txing = 0;	
+	}
+	else if (ei_local->tx2 < 0) 
+	{
+		if (ei_local->lasttx != 2  &&  ei_local->lasttx != -2)
+			printk("%s: bogus last_tx_buffer %d, tx2=%d.\n",
+				ei_local->name, ei_local->lasttx, ei_local->tx2);
+		ei_local->tx2 = 0;
+		if (ei_local->tx1 > 0) 
+		{
+			ei_local->txing = 1;
+			NS8390_trigger_send(dev, ei_local->tx1, ei_local->tx_start_page);
+			dev->trans_start = jiffies;
+			ei_local->tx1 = -1;
+			ei_local->lasttx = 1;
+		}
+		else
+			ei_local->lasttx = 10, ei_local->txing = 0;
+	}
+//	else printk(KERN_WARNING "%s: unexpected TX-done interrupt, lasttx=%d.\n",
+//			dev->name, ei_local->lasttx);
+
+#else	/* EI_PINGPONG */
+	/*
+	 *  Single Tx buffer: mark it free so another packet can be loaded.
+	 */
+	ei_local->txing = 0;
+#endif
+
+	/* Minimize Tx latency: update the statistics after we restart TXing. */
+	if (status & ENTSR_COL)
+		ei_local->stat.collisions++;
+	if (status & ENTSR_PTX)
+		ei_local->stat.tx_packets++;
+	else 
+	{
+		ei_local->stat.tx_errors++;
+		if (status & ENTSR_ABT) 
+		{
+			ei_local->stat.tx_aborted_errors++;
+			ei_local->stat.collisions += 16;
+		}
+		if (status & ENTSR_CRS) 
+			ei_local->stat.tx_carrier_errors++;
+		if (status & ENTSR_FU) 
+			ei_local->stat.tx_fifo_errors++;
+		if (status & ENTSR_CDH)
+			ei_local->stat.tx_heartbeat_errors++;
+		if (status & ENTSR_OWC)
+			ei_local->stat.tx_window_errors++;
+	}
+	netif_wake_queue(dev);
+}
+
+/**
+ * ei_receive - receive some packets
+ * @dev: network device with which receive will be run
+ *
+ * We have a good packet(s), get it/them out of the buffers. 
+ * Called with lock held.
+ */
+
+static void ei_receive(struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	unsigned char rxing_page, this_frame, next_frame;
+	unsigned short current_offset;
+	int rx_pkt_count = 0;
+	struct e8390_pkt_hdr rx_frame;
+	int num_rx_pages = ei_local->stop_page-ei_local->rx_start_page;
+    
+	while (++rx_pkt_count < 10) 
+	{
+		int pkt_len, pkt_stat;
+		
+		/* Get the rx page (incoming packet pointer). */
+		outb_p(E8390_NODMA+E8390_PAGE1, e8390_base + E8390_CMD);
+		rxing_page = inb_p(e8390_base + EN1_CURPAG);
+		outb_p(E8390_NODMA+E8390_PAGE0, e8390_base + E8390_CMD);
+		
+		/* Remove one frame from the ring.  Boundary is always a page behind. */
+		this_frame = inb_p(e8390_base + EN0_BOUNDARY) + 1;
+		if (this_frame >= ei_local->stop_page)
+			this_frame = ei_local->rx_start_page;
+		
+		/* Someday we'll omit the previous, iff we never get this message.
+		   (There is at least one clone claimed to have a problem.)  
+		   
+		   Keep quiet if it looks like a card removal. One problem here
+		   is that some clones crash in roughly the same way.
+		 */
+		if (ei_debug > 0  &&  this_frame != ei_local->current_page && (this_frame!=0x0 || rxing_page!=0xFF))
+			printk(KERN_ERR "%s: mismatched read page pointers %2x vs %2x.\n",
+				   dev->name, this_frame, ei_local->current_page);
+		
+		if (this_frame == rxing_page)	/* Read all the frames? */
+			break;				/* Done for now */
+		
+		current_offset = this_frame << 8;
+		ei_get_8390_hdr(dev, &rx_frame, this_frame);
+		
+		pkt_len = rx_frame.count - sizeof(struct e8390_pkt_hdr);
+		pkt_stat = rx_frame.status;
+		
+		next_frame = this_frame + 1 + ((pkt_len+4)>>8);
+		
+		/* Check for bogosity warned by 3c503 book: the status byte is never
+		   written.  This happened a lot during testing! This code should be
+		   cleaned up someday. */
+		if (rx_frame.next != next_frame
+			&& rx_frame.next != next_frame + 1
+			&& rx_frame.next != next_frame - num_rx_pages
+			&& rx_frame.next != next_frame + 1 - num_rx_pages) {
+			ei_local->current_page = rxing_page;
+			outb(ei_local->current_page-1, e8390_base+EN0_BOUNDARY);
+			ei_local->stat.rx_errors++;
+			continue;
+		}
+
+		if (pkt_len < 60  ||  pkt_len > 1518) 
+		{
+			if (ei_debug)
+				printk(KERN_DEBUG "%s: bogus packet size: %d, status=%#2x nxpg=%#2x.\n",
+					   dev->name, rx_frame.count, rx_frame.status,
+					   rx_frame.next);
+			ei_local->stat.rx_errors++;
+			ei_local->stat.rx_length_errors++;
+		}
+		 else if ((pkt_stat & 0x0F) == ENRSR_RXOK) 
+		{
+			struct sk_buff *skb;
+			
+			skb = dev_alloc_skb(pkt_len+2);
+			if (skb == NULL) 
+			{
+				if (ei_debug > 1)
+					printk(KERN_DEBUG "%s: Couldn't allocate a sk_buff of size %d.\n",
+						   dev->name, pkt_len);
+				ei_local->stat.rx_dropped++;
+				break;
+			}
+			else
+			{
+				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
+				skb->dev = dev;
+				skb_put(skb, pkt_len);	/* Make room */
+				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
+				skb->protocol=eth_type_trans(skb,dev);
+				netif_rx(skb);
+				dev->last_rx = jiffies;
+				ei_local->stat.rx_packets++;
+				ei_local->stat.rx_bytes += pkt_len;
+				if (pkt_stat & ENRSR_PHY)
+					ei_local->stat.multicast++;
+			}
+		} 
+		else 
+		{
+			if (ei_debug)
+				printk(KERN_DEBUG "%s: bogus packet: status=%#2x nxpg=%#2x size=%d\n",
+					   dev->name, rx_frame.status, rx_frame.next,
+					   rx_frame.count);
+			ei_local->stat.rx_errors++;
+			/* NB: The NIC counts CRC, frame and missed errors. */
+			if (pkt_stat & ENRSR_FO)
+				ei_local->stat.rx_fifo_errors++;
+		}
+		next_frame = rx_frame.next;
+		
+		/* This _should_ never happen: it's here for avoiding bad clones. */
+		if (next_frame >= ei_local->stop_page) {
+			printk("%s: next frame inconsistency, %#2x\n", dev->name,
+				   next_frame);
+			next_frame = ei_local->rx_start_page;
+		}
+		ei_local->current_page = next_frame;
+		outb_p(next_frame-1, e8390_base+EN0_BOUNDARY);
+	}
+
+	/* We used to also ack ENISR_OVER here, but that would sometimes mask
+	   a real overrun, leaving the 8390 in a stopped state with rec'vr off. */
+	outb_p(ENISR_RX+ENISR_RX_ERR, e8390_base+EN0_ISR);
+	return;
+}
+
+/**
+ * ei_rx_overrun - handle receiver overrun
+ * @dev: network device which threw exception
+ *
+ * We have a receiver overrun: we have to kick the 8390 to get it started
+ * again. Problem is that you have to kick it exactly as NS prescribes in
+ * the updated datasheets, or "the NIC may act in an unpredictable manner."
+ * This includes causing "the NIC to defer indefinitely when it is stopped
+ * on a busy network."  Ugh.
+ * Called with lock held. Don't call this with the interrupts off or your
+ * computer will hate you - it takes 10ms or so. 
+ */
+
+static void ei_rx_overrun(struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	unsigned char was_txing, must_resend = 0;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+    
+	/*
+	 * Record whether a Tx was in progress and then issue the
+	 * stop command.
+	 */
+	was_txing = inb_p(e8390_base+E8390_CMD) & E8390_TRANS;
+	outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD);
+    
+	if (ei_debug > 1)
+		printk(KERN_DEBUG "%s: Receiver overrun.\n", dev->name);
+	ei_local->stat.rx_over_errors++;
+    
+	/* 
+	 * Wait a full Tx time (1.2ms) + some guard time, NS says 1.6ms total.
+	 * Early datasheets said to poll the reset bit, but now they say that
+	 * it "is not a reliable indicator and subsequently should be ignored."
+	 * We wait at least 10ms.
+	 */
+
+	udelay(10*1000);
+
+	/*
+	 * Reset RBCR[01] back to zero as per magic incantation.
+	 */
+	outb_p(0x00, e8390_base+EN0_RCNTLO);
+	outb_p(0x00, e8390_base+EN0_RCNTHI);
+
+	/*
+	 * See if any Tx was interrupted or not. According to NS, this
+	 * step is vital, and skipping it will cause no end of havoc.
+	 */
+
+	if (was_txing)
+	{ 
+		unsigned char tx_completed = inb_p(e8390_base+EN0_ISR) & (ENISR_TX+ENISR_TX_ERR);
+		if (!tx_completed)
+			must_resend = 1;
+	}
+
+	/*
+	 * Have to enter loopback mode and then restart the NIC before
+	 * you are allowed to slurp packets up off the ring.
+	 */
+	outb_p(E8390_TXOFF, e8390_base + EN0_TXCR);
+	outb_p(E8390_NODMA + E8390_PAGE0 + E8390_START, e8390_base + E8390_CMD);
+
+	/*
+	 * Clear the Rx ring of all the debris, and ack the interrupt.
+	 */
+	ei_receive(dev);
+	outb_p(ENISR_OVER, e8390_base+EN0_ISR);
+
+	/*
+	 * Leave loopback mode, and resend any packet that got stopped.
+	 */
+	outb_p(E8390_TXCONFIG, e8390_base + EN0_TXCR); 
+	if (must_resend)
+    		outb_p(E8390_NODMA + E8390_PAGE0 + E8390_START + E8390_TRANS, e8390_base + E8390_CMD);
+}
+
+/*
+ *	Collect the stats. This is called unlocked and from several contexts.
+ */
+ 
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+	long ioaddr = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	unsigned long flags;
+    
+	/* If the card is stopped, just return the present stats. */
+	if (!netif_running(dev))
+		return &ei_local->stat;
+
+	spin_lock_irqsave(&ei_local->page_lock,flags);
+	/* Read the counter registers, assuming we are in page 0. */
+	ei_local->stat.rx_frame_errors += inb_p(ioaddr + EN0_COUNTER0);
+	ei_local->stat.rx_crc_errors   += inb_p(ioaddr + EN0_COUNTER1);
+	ei_local->stat.rx_missed_errors+= inb_p(ioaddr + EN0_COUNTER2);
+	spin_unlock_irqrestore(&ei_local->page_lock, flags);
+    
+	return &ei_local->stat;
+}
+
+/*
+ * Update the given Autodin II CRC value with another data byte.
+ */
+
+static inline u32 update_crc(u8 byte, u32 current_crc)
+{
+	int bit;
+	u8 ah = 0;
+	for (bit=0; bit<8; bit++) 
+	{
+		u8 carry = (current_crc>>31);
+		current_crc <<= 1;
+		ah = ((ah<<1) | carry) ^ byte;
+		if (ah&1)
+			current_crc ^= 0x04C11DB7;	/* CRC polynomial */
+		ah >>= 1;
+		byte >>= 1;
+	}
+	return current_crc;
+}
+
+/*
+ * Form the 64 bit 8390 multicast table from the linked list of addresses
+ * associated with this dev structure.
+ */
+ 
+static inline void make_mc_bits(u8 *bits, struct net_device *dev)
+{
+	struct dev_mc_list *dmi;
+
+	for (dmi=dev->mc_list; dmi; dmi=dmi->next) 
+	{
+		int i;
+		u32 crc;
+		if (dmi->dmi_addrlen != ETH_ALEN) 
+		{
+			printk(KERN_INFO "%s: invalid multicast address length given.\n", dev->name);
+			continue;
+		}
+		crc = 0xffffffff;	/* initial CRC value */
+		for (i=0; i<ETH_ALEN; i++)
+			crc = update_crc(dmi->dmi_addr[i], crc);
+		/* 
+		 * The 8390 uses the 6 most significant bits of the
+		 * CRC to index the multicast table.
+		 */
+		bits[crc>>29] |= (1<<((crc>>26)&7));
+	}
+}
+
+/**
+ * do_set_multicast_list - set/clear multicast filter
+ * @dev: net device for which multicast filter is adjusted
+ *
+ *	Set or clear the multicast filter for this adaptor. May be called
+ *	from a BH in 2.1.x. Must be called with lock held. 
+ */
+ 
+static void do_set_multicast_list(struct net_device *dev)
+{
+	long e8390_base = dev->base_addr;
+	int i;
+	struct ei_device *ei_local = (struct ei_device*)dev->priv;
+
+	if (!(dev->flags&(IFF_PROMISC|IFF_ALLMULTI))) 
+	{
+		memset(ei_local->mcfilter, 0, 8);
+		if (dev->mc_list)
+			make_mc_bits(ei_local->mcfilter, dev);
+	}
+	else
+		memset(ei_local->mcfilter, 0xFF, 8);	/* mcast set to accept-all */
+
+	/* 
+	 * DP8390 manuals don't specify any magic sequence for altering
+	 * the multicast regs on an already running card. To be safe, we
+	 * ensure multicast mode is off prior to loading up the new hash
+	 * table. If this proves to be not enough, we can always resort
+	 * to stopping the NIC, loading the table and then restarting.
+	 *
+	 * Bug Alert!  The MC regs on the SMC 83C690 (SMC Elite and SMC 
+	 * Elite16) appear to be write-only. The NS 8390 data sheet lists
+	 * them as r/w so this is a bug.  The SMC 83C790 (SMC Ultra and
+	 * Ultra32 EISA) appears to have this bug fixed.
+	 */
+	 
+	if (netif_running(dev))
+		outb_p(E8390_RXCONFIG, e8390_base + EN0_RXCR);
+	outb_p(E8390_NODMA + E8390_PAGE1, e8390_base + E8390_CMD);
+	for(i = 0; i < 8; i++) 
+	{
+		outb_p(ei_local->mcfilter[i], e8390_base + EN1_MULT_SHIFT(i));
+#ifndef BUG_83C690
+		if(inb_p(e8390_base + EN1_MULT_SHIFT(i))!=ei_local->mcfilter[i])
+			printk(KERN_ERR "Multicast filter read/write mismap %d\n",i);
+#endif
+	}
+	outb_p(E8390_NODMA + E8390_PAGE0, e8390_base + E8390_CMD);
+
+  	if(dev->flags&IFF_PROMISC)
+  		outb_p(E8390_RXCONFIG | 0x18, e8390_base + EN0_RXCR);
+	else if(dev->flags&IFF_ALLMULTI || dev->mc_list)
+  		outb_p(E8390_RXCONFIG | 0x08, e8390_base + EN0_RXCR);
+  	else
+  		outb_p(E8390_RXCONFIG, e8390_base + EN0_RXCR);
+ }
+
+/*
+ *	Called without lock held. This is invoked from user context and may
+ *	be parallel to just about everything else. Its also fairly quick and
+ *	not called too often. Must protect against both bh and irq users
+ */
+ 
+static void set_multicast_list(struct net_device *dev)
+{
+	unsigned long flags;
+	struct ei_device *ei_local = (struct ei_device*)dev->priv;
+	
+	spin_lock_irqsave(&ei_local->page_lock, flags);
+	do_set_multicast_list(dev);
+	spin_unlock_irqrestore(&ei_local->page_lock, flags);
+}	
+
+/**
+ * ethdev_init - init rest of 8390 device struct
+ * @dev: network device structure to init
+ *
+ * Initialize the rest of the 8390 device structure.  Do NOT __init
+ * this, as it is used by 8390 based modular drivers too.
+ */
+
+int ethdev_init(struct net_device *dev)
+{
+	if (ei_debug > 1)
+		printk(version);
+    
+	if (dev->priv == NULL) 
+	{
+		struct ei_device *ei_local;
+		
+		dev->priv = kmalloc(sizeof(struct ei_device), GFP_KERNEL);
+		if (dev->priv == NULL)
+			return -ENOMEM;
+		memset(dev->priv, 0, sizeof(struct ei_device));
+		ei_local = (struct ei_device *)dev->priv;
+		spin_lock_init(&ei_local->page_lock);
+	}
+    
+	dev->hard_start_xmit = &ei_start_xmit;
+	dev->get_stats	= get_stats;
+	dev->set_multicast_list = &set_multicast_list;
+
+	ether_setup(dev);
+        
+	return 0;
+}
+
+
+
+/* This page of functions should be 8390 generic */
+/* Follow National Semi's recommendations for initializing the "NIC". */
+
+/**
+ * NS8390_init - initialize 8390 hardware
+ * @dev: network device to initialize
+ * @startp: boolean.  non-zero value to initiate chip processing
+ *
+ *	Must be called with lock held.
+ */
+
+void NS8390_init(struct net_device *dev, int startp)
+{
+	long e8390_base = dev->base_addr;
+	struct ei_device *ei_local = (struct ei_device *) dev->priv;
+	int i;
+	int endcfg = ei_local->word16
+	    ? (0x48 | ENDCFG_WTS | (ei_local->bigendian ? ENDCFG_BOS : 0))
+	    : 0x48;
+    
+	if(sizeof(struct e8390_pkt_hdr)!=4)
+    		panic("8390.c: header struct mispacked\n");    
+	/* Follow National Semi's recommendations for initing the DP83902. */
+	outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD); /* 0x21 */
+	outb_p(endcfg, e8390_base + EN0_DCFG);	/* 0x48 or 0x49 */
+	/* Clear the remote byte count registers. */
+	outb_p(0x00,  e8390_base + EN0_RCNTLO);
+	outb_p(0x00,  e8390_base + EN0_RCNTHI);
+	/* Set to monitor and loopback mode -- this is vital!. */
+	outb_p(E8390_RXOFF, e8390_base + EN0_RXCR); /* 0x20 */
+	outb_p(E8390_TXOFF, e8390_base + EN0_TXCR); /* 0x02 */
+	/* Set the transmit page and receive ring. */
+	outb_p(ei_local->tx_start_page, e8390_base + EN0_TPSR);
+	ei_local->tx1 = ei_local->tx2 = 0;
+	outb_p(ei_local->rx_start_page, e8390_base + EN0_STARTPG);
+	outb_p(ei_local->stop_page-1, e8390_base + EN0_BOUNDARY);	/* 3c503 says 0x3f,NS0x26*/
+	ei_local->current_page = ei_local->rx_start_page;		/* assert boundary+1 */
+	outb_p(ei_local->stop_page, e8390_base + EN0_STOPPG);
+	/* Clear the pending interrupts and mask. */
+	outb_p(0xFF, e8390_base + EN0_ISR);
+	outb_p(0x00,  e8390_base + EN0_IMR);
+    
+	/* Copy the station address into the DS8390 registers. */
+
+	outb_p(E8390_NODMA + E8390_PAGE1 + E8390_STOP, e8390_base+E8390_CMD); /* 0x61 */
+	for(i = 0; i < 6; i++) 
+	{
+		outb_p(dev->dev_addr[i], e8390_base + EN1_PHYS_SHIFT(i));
+		if(inb_p(e8390_base + EN1_PHYS_SHIFT(i))!=dev->dev_addr[i])
+			printk(KERN_ERR "Hw. address read/write mismap %d\n",i);
+	}
+
+	outb_p(ei_local->rx_start_page, e8390_base + EN1_CURPAG);
+	outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, e8390_base+E8390_CMD);
+
+	netif_start_queue(dev);
+	ei_local->tx1 = ei_local->tx2 = 0;
+	ei_local->txing = 0;
+
+	if (startp) 
+	{
+		outb_p(0xff,  e8390_base + EN0_ISR);
+		outb_p(ENISR_ALL,  e8390_base + EN0_IMR);
+		outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base+E8390_CMD);
+		outb_p(E8390_TXCONFIG, e8390_base + EN0_TXCR); /* xmit on. */
+		/* 3c503 TechMan says rxconfig only after the NIC is started. */
+		outb_p(E8390_RXCONFIG, e8390_base + EN0_RXCR); /* rx on,  */
+		do_set_multicast_list(dev);	/* (re)load the mcast table */
+	}
+}
+
+/* Trigger a transmit start, assuming the length is valid. 
+   Always called with the page lock held */
+   
+static void NS8390_trigger_send(struct net_device *dev, unsigned int length,
+								int start_page)
+{
+	long e8390_base = dev->base_addr;
+ 	struct ei_device *ei_local __attribute((unused)) = (struct ei_device *) dev->priv;
+   
+	outb_p(E8390_NODMA+E8390_PAGE0, e8390_base+E8390_CMD);
+    
+	if (inb_p(e8390_base) & E8390_TRANS) 
+	{
+		printk(KERN_WARNING "%s: trigger_send() called with the transmitter busy.\n",
+			dev->name);
+		return;
+	}
+	outb_p(length & 0xff, e8390_base + EN0_TCNTLO);
+	outb_p(length >> 8, e8390_base + EN0_TCNTHI);
+	outb_p(start_page, e8390_base + EN0_TPSR);
+	outb_p(E8390_NODMA+E8390_TRANS+E8390_START, e8390_base+E8390_CMD);
+}
+
+EXPORT_SYMBOL(ei_open);
+EXPORT_SYMBOL(ei_close);
+EXPORT_SYMBOL(ei_interrupt);
+EXPORT_SYMBOL(ei_tx_timeout);
+EXPORT_SYMBOL(ethdev_init);
+EXPORT_SYMBOL(NS8390_init);
+
+#if defined(MODULE)
+
+int init_module(void)
+{
+	return 0;
+}
+
+void cleanup_module(void)
+{
+}
+
+#endif /* MODULE */
diff --git a/xen/drivers/net/ne/8390.h b/xen/drivers/net/ne/8390.h
new file mode 100644
index 0000000000..1a3be1775d
--- /dev/null
+++ b/xen/drivers/net/ne/8390.h
@@ -0,0 +1,197 @@
+/* Generic NS8390 register definitions. */
+/* This file is part of Donald Becker's 8390 drivers, and is distributed
+   under the same license. Auto-loading of 8390.o only in v2.2 - Paul G.
+   Some of these names and comments originated from the Crynwr
+   packet drivers, which are distributed under the GPL. */
+
+#ifndef _8390_h
+#define _8390_h
+
+#include <xeno/config.h>
+#include <xeno/if_ether.h>
+#include <xeno/ioport.h>
+#include <xeno/skbuff.h>
+
+#define TX_2X_PAGES 12
+#define TX_1X_PAGES 6
+
+/* Should always use two Tx slots to get back-to-back transmits. */
+#define EI_PINGPONG
+
+#ifdef EI_PINGPONG
+#define TX_PAGES TX_2X_PAGES
+#else
+#define TX_PAGES TX_1X_PAGES
+#endif
+
+#define ETHER_ADDR_LEN 6
+
+/* The 8390 specific per-packet-header format. */
+struct e8390_pkt_hdr {
+  unsigned char status; /* status */
+  unsigned char next;   /* pointer to next packet. */
+  unsigned short count; /* header + packet length in bytes */
+};
+
+#ifdef notdef
+extern int ei_debug;
+#else
+#define ei_debug 1
+#endif
+
+#ifndef HAVE_AUTOIRQ
+/* From auto_irq.c */
+extern void autoirq_setup(int waittime);
+extern unsigned long autoirq_report(int waittime);
+#endif
+
+extern int ethdev_init(struct net_device *dev);
+extern void NS8390_init(struct net_device *dev, int startp);
+extern int ei_open(struct net_device *dev);
+extern int ei_close(struct net_device *dev);
+extern void ei_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Most of these entries should be in 'struct net_device' (or most of the
+   things in there should be here!) */
+/* You have one of these per-board */
+struct ei_device {
+	const char *name;
+	void (*reset_8390)(struct net_device *);
+	void (*get_8390_hdr)(struct net_device *, struct e8390_pkt_hdr *, int);
+	void (*block_output)(struct net_device *, int, const unsigned char *, int);
+	void (*block_input)(struct net_device *, int, struct sk_buff *, int);
+	unsigned char mcfilter[8];
+	unsigned open:1;
+	unsigned word16:1;  		/* We have the 16-bit (vs 8-bit) version of the card. */
+	unsigned bigendian:1;		/* 16-bit big endian mode. Do NOT */
+					/* set this on random 8390 clones! */
+	unsigned txing:1;		/* Transmit Active */
+	unsigned irqlock:1;		/* 8390's intrs disabled when '1'. */
+	unsigned dmaing:1;		/* Remote DMA Active */
+	unsigned char tx_start_page, rx_start_page, stop_page;
+	unsigned char current_page;	/* Read pointer in buffer  */
+	unsigned char interface_num;	/* Net port (AUI, 10bT.) to use. */
+	unsigned char txqueue;		/* Tx Packet buffer queue length. */
+	short tx1, tx2;			/* Packet lengths for ping-pong tx. */
+	short lasttx;			/* Alpha version consistency check. */
+	unsigned char reg0;		/* Register '0' in a WD8013 */
+	unsigned char reg5;		/* Register '5' in a WD8013 */
+	unsigned char saved_irq;	/* Original dev->irq value. */
+	struct net_device_stats stat;	/* The new statistics table. */
+	u32 *reg_offset;		/* Register mapping table */
+	spinlock_t page_lock;		/* Page register locks */
+	unsigned long priv;		/* Private field to store bus IDs etc. */
+};
+
+/* The maximum number of 8390 interrupt service routines called per IRQ. */
+#define MAX_SERVICE 12
+
+/* The maximum time waited (in jiffies) before assuming a Tx failed. (20ms) */
+#define TX_TIMEOUT (20*HZ/100)
+
+#define ei_status (*(struct ei_device *)(dev->priv))
+
+/* Some generic ethernet register configurations. */
+#define E8390_TX_IRQ_MASK	0xa	/* For register EN0_ISR */
+#define E8390_RX_IRQ_MASK	0x5
+#define E8390_RXCONFIG		0x4	/* EN0_RXCR: broadcasts, no multicast,errors */
+#define E8390_RXOFF		0x20	/* EN0_RXCR: Accept no packets */
+#define E8390_TXCONFIG		0x00	/* EN0_TXCR: Normal transmit mode */
+#define E8390_TXOFF		0x02	/* EN0_TXCR: Transmitter off */
+
+/*  Register accessed at EN_CMD, the 8390 base addr.  */
+#define E8390_STOP	0x01	/* Stop and reset the chip */
+#define E8390_START	0x02	/* Start the chip, clear reset */
+#define E8390_TRANS	0x04	/* Transmit a frame */
+#define E8390_RREAD	0x08	/* Remote read */
+#define E8390_RWRITE	0x10	/* Remote write  */
+#define E8390_NODMA	0x20	/* Remote DMA */
+#define E8390_PAGE0	0x00	/* Select page chip registers */
+#define E8390_PAGE1	0x40	/* using the two high-order bits */
+#define E8390_PAGE2	0x80	/* Page 3 is invalid. */
+
+/*
+ *	Only generate indirect loads given a machine that needs them.
+ */
+ 
+#if defined(CONFIG_MAC) || defined(CONFIG_AMIGA_PCMCIA) || \
+    defined(CONFIG_ARIADNE2) || defined(CONFIG_ARIADNE2_MODULE) || \
+    defined(CONFIG_HYDRA) || defined(CONFIG_HYDRA_MODULE) || \
+    defined(CONFIG_ARM_ETHERH) || defined(CONFIG_ARM_ETHERH_MODULE)
+#define EI_SHIFT(x)	(ei_local->reg_offset[x])
+#else
+#define EI_SHIFT(x)	(x)
+#endif
+
+#define E8390_CMD	EI_SHIFT(0x00)  /* The command register (for all pages) */
+/* Page 0 register offsets. */
+#define EN0_CLDALO	EI_SHIFT(0x01)	/* Low byte of current local dma addr  RD */
+#define EN0_STARTPG	EI_SHIFT(0x01)	/* Starting page of ring bfr WR */
+#define EN0_CLDAHI	EI_SHIFT(0x02)	/* High byte of current local dma addr  RD */
+#define EN0_STOPPG	EI_SHIFT(0x02)	/* Ending page +1 of ring bfr WR */
+#define EN0_BOUNDARY	EI_SHIFT(0x03)	/* Boundary page of ring bfr RD WR */
+#define EN0_TSR		EI_SHIFT(0x04)	/* Transmit status reg RD */
+#define EN0_TPSR	EI_SHIFT(0x04)	/* Transmit starting page WR */
+#define EN0_NCR		EI_SHIFT(0x05)	/* Number of collision reg RD */
+#define EN0_TCNTLO	EI_SHIFT(0x05)	/* Low  byte of tx byte count WR */
+#define EN0_FIFO	EI_SHIFT(0x06)	/* FIFO RD */
+#define EN0_TCNTHI	EI_SHIFT(0x06)	/* High byte of tx byte count WR */
+#define EN0_ISR		EI_SHIFT(0x07)	/* Interrupt status reg RD WR */
+#define EN0_CRDALO	EI_SHIFT(0x08)	/* low byte of current remote dma address RD */
+#define EN0_RSARLO	EI_SHIFT(0x08)	/* Remote start address reg 0 */
+#define EN0_CRDAHI	EI_SHIFT(0x09)	/* high byte, current remote dma address RD */
+#define EN0_RSARHI	EI_SHIFT(0x09)	/* Remote start address reg 1 */
+#define EN0_RCNTLO	EI_SHIFT(0x0a)	/* Remote byte count reg WR */
+#define EN0_RCNTHI	EI_SHIFT(0x0b)	/* Remote byte count reg WR */
+#define EN0_RSR		EI_SHIFT(0x0c)	/* rx status reg RD */
+#define EN0_RXCR	EI_SHIFT(0x0c)	/* RX configuration reg WR */
+#define EN0_TXCR	EI_SHIFT(0x0d)	/* TX configuration reg WR */
+#define EN0_COUNTER0	EI_SHIFT(0x0d)	/* Rcv alignment error counter RD */
+#define EN0_DCFG	EI_SHIFT(0x0e)	/* Data configuration reg WR */
+#define EN0_COUNTER1	EI_SHIFT(0x0e)	/* Rcv CRC error counter RD */
+#define EN0_IMR		EI_SHIFT(0x0f)	/* Interrupt mask reg WR */
+#define EN0_COUNTER2	EI_SHIFT(0x0f)	/* Rcv missed frame error counter RD */
+
+/* Bits in EN0_ISR - Interrupt status register */
+#define ENISR_RX	0x01	/* Receiver, no error */
+#define ENISR_TX	0x02	/* Transmitter, no error */
+#define ENISR_RX_ERR	0x04	/* Receiver, with error */
+#define ENISR_TX_ERR	0x08	/* Transmitter, with error */
+#define ENISR_OVER	0x10	/* Receiver overwrote the ring */
+#define ENISR_COUNTERS	0x20	/* Counters need emptying */
+#define ENISR_RDC	0x40	/* remote dma complete */
+#define ENISR_RESET	0x80	/* Reset completed */
+#define ENISR_ALL	0x3f	/* Interrupts we will enable */
+
+/* Bits in EN0_DCFG - Data config register */
+#define ENDCFG_WTS	0x01	/* word transfer mode selection */
+#define ENDCFG_BOS	0x02	/* byte order selection */
+
+/* Page 1 register offsets. */
+#define EN1_PHYS   EI_SHIFT(0x01)	/* This board's physical enet addr RD WR */
+#define EN1_PHYS_SHIFT(i)  EI_SHIFT(i+1) /* Get and set mac address */
+#define EN1_CURPAG EI_SHIFT(0x07)	/* Current memory page RD WR */
+#define EN1_MULT   EI_SHIFT(0x08)	/* Multicast filter mask array (8 bytes) RD WR */
+#define EN1_MULT_SHIFT(i)  EI_SHIFT(8+i) /* Get and set multicast filter */
+
+/* Bits in received packet status byte and EN0_RSR*/
+#define ENRSR_RXOK	0x01	/* Received a good packet */
+#define ENRSR_CRC	0x02	/* CRC error */
+#define ENRSR_FAE	0x04	/* frame alignment error */
+#define ENRSR_FO	0x08	/* FIFO overrun */
+#define ENRSR_MPA	0x10	/* missed pkt */
+#define ENRSR_PHY	0x20	/* physical/multicast address */
+#define ENRSR_DIS	0x40	/* receiver disable. set in monitor mode */
+#define ENRSR_DEF	0x80	/* deferring */
+
+/* Transmitted packet status, EN0_TSR. */
+#define ENTSR_PTX 0x01	/* Packet transmitted without error */
+#define ENTSR_ND  0x02	/* The transmit wasn't deferred. */
+#define ENTSR_COL 0x04	/* The transmit collided at least once. */
+#define ENTSR_ABT 0x08  /* The transmit collided 16 times, and was deferred. */
+#define ENTSR_CRS 0x10	/* The carrier sense was lost. */
+#define ENTSR_FU  0x20  /* A "FIFO underrun" occurred during transmit. */
+#define ENTSR_CDH 0x40	/* The collision detect "heartbeat" signal was lost. */
+#define ENTSR_OWC 0x80  /* There was an out-of-window collision. */
+
+#endif /* _8390_h */
diff --git a/xen/drivers/net/ne/Makefile b/xen/drivers/net/ne/Makefile
new file mode 100644
index 0000000000..d1bcc12d87
--- /dev/null
+++ b/xen/drivers/net/ne/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o ne_drv.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/drivers/net/ne/ne.c b/xen/drivers/net/ne/ne.c
new file mode 100644
index 0000000000..f694fc107b
--- /dev/null
+++ b/xen/drivers/net/ne/ne.c
@@ -0,0 +1,685 @@
+/* ne.c: A general non-shared-memory NS8390 ethernet driver for linux. */
+/*
+    Written 1992-94 by Donald Becker.
+
+    Copyright 1993 United States Government as represented by the
+    Director, National Security Agency.
+
+    This software may be used and distributed according to the terms
+    of the GNU General Public License, incorporated herein by reference.
+
+    The author may be reached as becker@scyld.com, or C/O
+    Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403
+
+    This driver should work with many programmed-I/O 8390-based ethernet
+    boards.  Currently it supports the NE1000, NE2000, many clones,
+    and some Cabletron products.
+
+    Changelog:
+
+    Paul Gortmaker	: use ENISR_RDC to monitor Tx PIO uploads, made
+			  sanity checks and bad clone support optional.
+    Paul Gortmaker	: new reset code, reset card after probe at boot.
+    Paul Gortmaker	: multiple card support for module users.
+    Paul Gortmaker	: Support for PCI ne2k clones, similar to lance.c
+    Paul Gortmaker	: Allow users with bad cards to avoid full probe.
+    Paul Gortmaker	: PCI probe changes, more PCI cards supported.
+    rjohnson@analogic.com : Changed init order so an interrupt will only
+    occur after memory is allocated for dev->priv. Deallocated memory
+    last in cleanup_modue()
+    Richard Guenther    : Added support for ISAPnP cards
+    Paul Gortmaker	: Discontinued PCI support - use ne2k-pci.c instead.
+
+*/
+
+/* Routines for the NatSemi-based designs (NE[12]000). */
+
+static const char version1[] =
+"ne.c:v1.10 9/23/94 Donald Becker (becker@scyld.com)\n";
+static const char version2[] =
+"Last modified Nov 1, 2000 by Paul Gortmaker\n";
+
+
+#include <xeno/module.h>
+#include <xeno/kernel.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <xeno/init.h>
+#include <xeno/delay.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <xeno/netdevice.h>
+#include <xeno/etherdevice.h>
+#include "8390.h"
+
+/* Some defines that people can play with if so inclined. */
+
+/* Do we support clones that don't adhere to 14,15 of the SAprom ? */
+#define SUPPORT_NE_BAD_CLONES
+
+/* Do we perform extra sanity checks on stuff ? */
+/* #define NE_SANITY_CHECK */
+
+/* Do we implement the read before write bugfix ? */
+/* #define NE_RW_BUGFIX */
+
+/* Do we have a non std. amount of memory? (in units of 256 byte pages) */
+/* #define PACKETBUF_MEMSIZE	0x40 */
+
+#ifdef SUPPORT_NE_BAD_CLONES
+/* A list of bad clones that we none-the-less recognize. */
+static struct { const char *name8, *name16; unsigned char SAprefix[4];}
+bad_clone_list[] __initdata = {
+    {"DE100", "DE200", {0x00, 0xDE, 0x01,}},
+    {"DE120", "DE220", {0x00, 0x80, 0xc8,}},
+    {"DFI1000", "DFI2000", {'D', 'F', 'I',}}, /* Original, eh?  */
+    {"EtherNext UTP8", "EtherNext UTP16", {0x00, 0x00, 0x79}},
+    {"NE1000","NE2000-invalid", {0x00, 0x00, 0xd8}}, /* Ancient real NE1000. */
+    {"NN1000", "NN2000",  {0x08, 0x03, 0x08}}, /* Outlaw no-name clone. */
+    {"4-DIM8","4-DIM16", {0x00,0x00,0x4d,}},  /* Outlaw 4-Dimension cards. */
+    {"Con-Intl_8", "Con-Intl_16", {0x00, 0x00, 0x24}}, /* Connect Int'nl */
+    {"ET-100","ET-200", {0x00, 0x45, 0x54}}, /* YANG and YA clone */
+    {"COMPEX","COMPEX16",{0x00,0x80,0x48}}, /* Broken ISA Compex cards */
+    {"E-LAN100", "E-LAN200", {0x00, 0x00, 0x5d}}, /* Broken ne1000 clones */
+    {"PCM-4823", "PCM-4823", {0x00, 0xc0, 0x6c}}, /* Broken Advantech MoBo */
+    {"REALTEK", "RTL8019", {0x00, 0x00, 0xe8}}, /* no-name with Realtek chip */
+    {"LCS-8834", "LCS-8836", {0x04, 0x04, 0x37}}, /* ShinyNet (SET) */
+    {0,}
+};
+#endif
+
+/* ---- No user-serviceable parts below ---- */
+
+#define NE_BASE	 (dev->base_addr)
+#define NE_CMD	 	0x00
+#define NE_DATAPORT	0x10	/* NatSemi-defined port window offset. */
+#define NE_RESET	0x1f	/* Issue a read to reset, a write to clear. */
+#define NE_IO_EXTENT	0x20
+
+#define NE1SM_START_PG	0x20	/* First page of TX buffer */
+#define NE1SM_STOP_PG 	0x40	/* Last page +1 of RX ring */
+#define NESM_START_PG	0x40	/* First page of TX buffer */
+#define NESM_STOP_PG	0x80	/* Last page +1 of RX ring */
+
+int ne_probe(struct net_device *dev);
+static int ne_probe1(struct net_device *dev, int ioaddr);
+
+static int ne_open(struct net_device *dev);
+static int ne_close(struct net_device *dev);
+
+static void ne_reset_8390(struct net_device *dev);
+static void ne_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr,
+			  int ring_page);
+static void ne_block_input(struct net_device *dev, int count,
+			  struct sk_buff *skb, int ring_offset);
+static void ne_block_output(struct net_device *dev, const int count,
+		const unsigned char *buf, const int start_page);
+
+
+/*  Probe for various non-shared-memory ethercards.
+
+   NEx000-clone boards have a Station Address PROM (SAPROM) in the packet
+   buffer memory space.  NE2000 clones have 0x57,0x57 in bytes 0x0e,0x0f of
+   the SAPROM, while other supposed NE2000 clones must be detected by their
+   SA prefix.
+
+   Reading the SAPROM from a word-wide card with the 8390 set in byte-wide
+   mode results in doubled values, which can be detected and compensated for.
+
+   The probe is also responsible for initializing the card and filling
+   in the 'dev' and 'ei_status' structures.
+
+   We use the minimum memory size for some ethercard product lines, iff we can't
+   distinguish models.  You can increase the packet buffer size by setting
+   PACKETBUF_MEMSIZE.  Reported Cabletron packet buffer locations are:
+	E1010   starts at 0x100 and ends at 0x2000.
+	E1010-x starts at 0x100 and ends at 0x8000. ("-x" means "more memory")
+	E2010	 starts at 0x100 and ends at 0x4000.
+	E2010-x starts at 0x100 and ends at 0xffff.  */
+
+int __init ne_probe(struct net_device *dev)
+{
+	unsigned int base_addr = dev->base_addr;
+
+	SET_MODULE_OWNER(dev);
+
+	/* First check any supplied i/o locations. User knows best. <cough> */
+	if (base_addr > 0x1ff)	/* Check a single specified location. */
+		return ne_probe1(dev, base_addr);
+
+	return -ENODEV;
+}
+
+static int __init ne_probe1(struct net_device *dev, int ioaddr)
+{
+	int i;
+	unsigned char SA_prom[32];
+	int wordlength = 2;
+	const char *name = NULL;
+	int start_page, stop_page;
+	int neX000, ctron, copam, bad_card;
+	int reg0, ret;
+	static unsigned version_printed;
+
+	if (!request_region(ioaddr, NE_IO_EXTENT, dev->name))
+		return -EBUSY;
+
+	reg0 = inb_p(ioaddr);
+	if (reg0 == 0xFF) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+
+	/* Do a preliminary verification that we have a 8390. */
+	{
+		int regd;
+		outb_p(E8390_NODMA+E8390_PAGE1+E8390_STOP, ioaddr + E8390_CMD);
+		regd = inb_p(ioaddr + 0x0d);
+		outb_p(0xff, ioaddr + 0x0d);
+		outb_p(E8390_NODMA+E8390_PAGE0, ioaddr + E8390_CMD);
+		inb_p(ioaddr + EN0_COUNTER0); /* Clear the counter by reading. */
+		if (inb_p(ioaddr + EN0_COUNTER0) != 0) {
+			outb_p(reg0, ioaddr);
+			outb_p(regd, ioaddr + 0x0d);	/* Restore the old values. */
+			ret = -ENODEV;
+			goto err_out;
+		}
+	}
+
+	if (ei_debug  &&  version_printed++ == 0)
+		printk(KERN_INFO "%s" KERN_INFO "%s", version1, version2);
+
+	printk(KERN_INFO "NE*000 ethercard probe at %#3x:", ioaddr);
+
+	/* A user with a poor card that fails to ack the reset, or that
+	   does not have a valid 0x57,0x57 signature can still use this
+	   without having to recompile. Specifying an i/o address along
+	   with an otherwise unused dev->mem_end value of "0xBAD" will
+	   cause the driver to skip these parts of the probe. */
+
+	bad_card = ((dev->base_addr != 0) && (dev->mem_end == 0xbad));
+
+	/* Reset card. Who knows what dain-bramaged state it was left in. */
+
+	{
+		unsigned long reset_start_time = jiffies;
+
+		/* DON'T change these to inb_p/outb_p or reset will fail on clones. */
+		outb(inb(ioaddr + NE_RESET), ioaddr + NE_RESET);
+
+		while ((inb_p(ioaddr + EN0_ISR) & ENISR_RESET) == 0)
+		if (jiffies - reset_start_time > 2*HZ/100) {
+			if (bad_card) {
+				printk(" (warning: no reset ack)");
+				break;
+			} else {
+				printk(" not found (no reset ack).\n");
+				ret = -ENODEV;
+				goto err_out;
+			}
+		}
+
+		outb_p(0xff, ioaddr + EN0_ISR);		/* Ack all intr. */
+	}
+
+	/* Read the 16 bytes of station address PROM.
+	   We must first initialize registers, similar to NS8390_init(eifdev, 0).
+	   We can't reliably read the SAPROM address without this.
+	   (I learned the hard way!). */
+	{
+		struct {unsigned char value, offset; } program_seq[] =
+		{
+			{E8390_NODMA+E8390_PAGE0+E8390_STOP, E8390_CMD}, /* Select page 0*/
+			{0x48,	EN0_DCFG},	/* Set byte-wide (0x48) access. */
+			{0x00,	EN0_RCNTLO},	/* Clear the count regs. */
+			{0x00,	EN0_RCNTHI},
+			{0x00,	EN0_IMR},	/* Mask completion irq. */
+			{0xFF,	EN0_ISR},
+			{E8390_RXOFF, EN0_RXCR},	/* 0x20  Set to monitor */
+			{E8390_TXOFF, EN0_TXCR},	/* 0x02  and loopback mode. */
+			{32,	EN0_RCNTLO},
+			{0x00,	EN0_RCNTHI},
+			{0x00,	EN0_RSARLO},	/* DMA starting at 0x0000. */
+			{0x00,	EN0_RSARHI},
+			{E8390_RREAD+E8390_START, E8390_CMD},
+		};
+
+		for (i = 0; i < sizeof(program_seq)/sizeof(program_seq[0]); i++)
+			outb_p(program_seq[i].value, ioaddr + program_seq[i].offset);
+
+	}
+	for(i = 0; i < 32 /*sizeof(SA_prom)*/; i+=2) {
+		SA_prom[i] = inb(ioaddr + NE_DATAPORT);
+		SA_prom[i+1] = inb(ioaddr + NE_DATAPORT);
+		if (SA_prom[i] != SA_prom[i+1])
+			wordlength = 1;
+	}
+
+	if (wordlength == 2)
+	{
+		for (i = 0; i < 16; i++)
+			SA_prom[i] = SA_prom[i+i];
+		/* We must set the 8390 for word mode. */
+		outb_p(0x49, ioaddr + EN0_DCFG);
+		start_page = NESM_START_PG;
+		stop_page = NESM_STOP_PG;
+	} else {
+		start_page = NE1SM_START_PG;
+		stop_page = NE1SM_STOP_PG;
+	}
+
+	neX000 = (SA_prom[14] == 0x57  &&  SA_prom[15] == 0x57);
+	ctron =  (SA_prom[0] == 0x00 && SA_prom[1] == 0x00 && SA_prom[2] == 0x1d);
+	copam =  (SA_prom[14] == 0x49 && SA_prom[15] == 0x00);
+
+	/* Set up the rest of the parameters. */
+	if (neX000 || bad_card || copam) {
+		name = (wordlength == 2) ? "NE2000" : "NE1000";
+	}
+	else if (ctron)
+	{
+		name = (wordlength == 2) ? "Ctron-8" : "Ctron-16";
+		start_page = 0x01;
+		stop_page = (wordlength == 2) ? 0x40 : 0x20;
+	}
+	else
+	{
+#ifdef SUPPORT_NE_BAD_CLONES
+		/* Ack!  Well, there might be a *bad* NE*000 clone there.
+		   Check for total bogus addresses. */
+		for (i = 0; bad_clone_list[i].name8; i++)
+		{
+			if (SA_prom[0] == bad_clone_list[i].SAprefix[0] &&
+				SA_prom[1] == bad_clone_list[i].SAprefix[1] &&
+				SA_prom[2] == bad_clone_list[i].SAprefix[2])
+			{
+				if (wordlength == 2)
+				{
+					name = bad_clone_list[i].name16;
+				} else {
+					name = bad_clone_list[i].name8;
+				}
+				break;
+			}
+		}
+		if (bad_clone_list[i].name8 == NULL)
+		{
+			printk(" not found (invalid signature %2.2x %2.2x).\n",
+				SA_prom[14], SA_prom[15]);
+			ret = -ENXIO;
+			goto err_out;
+		}
+#else
+		printk(" not found.\n");
+		ret = -ENXIO;
+		goto err_out;
+#endif
+	}
+
+	if (dev->irq < 2)
+	{
+		unsigned long cookie = probe_irq_on();
+		outb_p(0x50, ioaddr + EN0_IMR);	/* Enable one interrupt. */
+		outb_p(0x00, ioaddr + EN0_RCNTLO);
+		outb_p(0x00, ioaddr + EN0_RCNTHI);
+		outb_p(E8390_RREAD+E8390_START, ioaddr); /* Trigger it... */
+		mdelay(10);		/* wait 10ms for interrupt to propagate */
+		outb_p(0x00, ioaddr + EN0_IMR); 		/* Mask it again. */
+		dev->irq = probe_irq_off(cookie);
+		if (ei_debug > 2)
+			printk(" autoirq is %d\n", dev->irq);
+	} else if (dev->irq == 2)
+		/* Fixup for users that don't know that IRQ 2 is really IRQ 9,
+		   or don't know which one to set. */
+		dev->irq = 9;
+
+	if (! dev->irq) {
+		printk(" failed to detect IRQ line.\n");
+		ret = -EAGAIN;
+		goto err_out;
+	}
+
+	/* Allocate dev->priv and fill in 8390 specific dev fields. */
+	if (ethdev_init(dev))
+	{
+        	printk (" unable to get memory for dev->priv.\n");
+        	ret = -ENOMEM;
+		goto err_out;
+	}
+
+	/* Snarf the interrupt now.  There's no point in waiting since we cannot
+	   share and the board will usually be enabled. */
+	ret = request_irq(dev->irq, ei_interrupt, 0, name, dev);
+	if (ret) {
+		printk (" unable to get IRQ %d (errno=%d).\n", dev->irq, ret);
+		goto err_out_kfree;
+	}
+
+	dev->base_addr = ioaddr;
+
+	for(i = 0; i < ETHER_ADDR_LEN; i++) {
+		printk(" %2.2x", SA_prom[i]);
+		dev->dev_addr[i] = SA_prom[i];
+	}
+
+	printk("\n%s: %s found at %#x, using IRQ %d.\n",
+		dev->name, name, ioaddr, dev->irq);
+
+	ei_status.name = name;
+	ei_status.tx_start_page = start_page;
+	ei_status.stop_page = stop_page;
+	ei_status.word16 = (wordlength == 2);
+
+	ei_status.rx_start_page = start_page + TX_PAGES;
+#ifdef PACKETBUF_MEMSIZE
+	 /* Allow the packet buffer size to be overridden by know-it-alls. */
+	ei_status.stop_page = ei_status.tx_start_page + PACKETBUF_MEMSIZE;
+#endif
+
+	ei_status.reset_8390 = &ne_reset_8390;
+	ei_status.block_input = &ne_block_input;
+	ei_status.block_output = &ne_block_output;
+	ei_status.get_8390_hdr = &ne_get_8390_hdr;
+	ei_status.priv = 0;
+	dev->open = &ne_open;
+	dev->stop = &ne_close;
+	NS8390_init(dev, 0);
+	return 0;
+
+err_out_kfree:
+	kfree(dev->priv);
+	dev->priv = NULL;
+err_out:
+	release_region(ioaddr, NE_IO_EXTENT);
+	return ret;
+}
+
+static int ne_open(struct net_device *dev)
+{
+	ei_open(dev);
+	return 0;
+}
+
+static int ne_close(struct net_device *dev)
+{
+	if (ei_debug > 1)
+		printk(KERN_DEBUG "%s: Shutting down ethercard.\n", dev->name);
+	ei_close(dev);
+	return 0;
+}
+
+/* Hard reset the card.  This used to pause for the same period that a
+   8390 reset command required, but that shouldn't be necessary. */
+
+static void ne_reset_8390(struct net_device *dev)
+{
+	unsigned long reset_start_time = jiffies;
+
+	if (ei_debug > 1)
+		printk(KERN_DEBUG "resetting the 8390 t=%ld...", jiffies);
+
+	/* DON'T change these to inb_p/outb_p or reset will fail on clones. */
+	outb(inb(NE_BASE + NE_RESET), NE_BASE + NE_RESET);
+
+	ei_status.txing = 0;
+	ei_status.dmaing = 0;
+
+	/* This check _should_not_ be necessary, omit eventually. */
+	while ((inb_p(NE_BASE+EN0_ISR) & ENISR_RESET) == 0)
+		if (jiffies - reset_start_time > 2*HZ/100) {
+			printk(KERN_WARNING "%s: ne_reset_8390() did not complete.\n", dev->name);
+			break;
+		}
+	outb_p(ENISR_RESET, NE_BASE + EN0_ISR);	/* Ack intr. */
+}
+
+/* Grab the 8390 specific header. Similar to the block_input routine, but
+   we don't need to be concerned with ring wrap as the header will be at
+   the start of a page, so we optimize accordingly. */
+
+static void ne_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page)
+{
+	int nic_base = dev->base_addr;
+
+	/* This *shouldn't* happen. If it does, it's the last thing you'll see */
+
+	if (ei_status.dmaing)
+	{
+		printk(KERN_EMERG "%s: DMAing conflict in ne_get_8390_hdr "
+			"[DMAstat:%d][irqlock:%d].\n",
+			dev->name, ei_status.dmaing, ei_status.irqlock);
+		return;
+	}
+
+	ei_status.dmaing |= 0x01;
+	outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD);
+	outb_p(sizeof(struct e8390_pkt_hdr), nic_base + EN0_RCNTLO);
+	outb_p(0, nic_base + EN0_RCNTHI);
+	outb_p(0, nic_base + EN0_RSARLO);		/* On page boundary */
+	outb_p(ring_page, nic_base + EN0_RSARHI);
+	outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+
+	if (ei_status.word16)
+		insw(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)>>1);
+	else
+		insb(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr));
+
+	outb_p(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	ei_status.dmaing &= ~0x01;
+
+	le16_to_cpus(&hdr->count);
+}
+
+/* Block input and output, similar to the Crynwr packet driver.  If you
+   are porting to a new ethercard, look at the packet driver source for hints.
+   The NEx000 doesn't share the on-board packet memory -- you have to put
+   the packet out through the "remote DMA" dataport using outb. */
+
+static void ne_block_input(struct net_device *dev, int count, struct sk_buff *skb, int ring_offset)
+{
+#ifdef NE_SANITY_CHECK
+	int xfer_count = count;
+#endif
+	int nic_base = dev->base_addr;
+	char *buf = skb->data;
+
+	/* This *shouldn't* happen. If it does, it's the last thing you'll see */
+	if (ei_status.dmaing)
+	{
+		printk(KERN_EMERG "%s: DMAing conflict in ne_block_input "
+			"[DMAstat:%d][irqlock:%d].\n",
+			dev->name, ei_status.dmaing, ei_status.irqlock);
+		return;
+	}
+	ei_status.dmaing |= 0x01;
+	outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD);
+	outb_p(count & 0xff, nic_base + EN0_RCNTLO);
+	outb_p(count >> 8, nic_base + EN0_RCNTHI);
+	outb_p(ring_offset & 0xff, nic_base + EN0_RSARLO);
+	outb_p(ring_offset >> 8, nic_base + EN0_RSARHI);
+	outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+	if (ei_status.word16)
+	{
+		insw(NE_BASE + NE_DATAPORT,buf,count>>1);
+		if (count & 0x01)
+		{
+			buf[count-1] = inb(NE_BASE + NE_DATAPORT);
+#ifdef NE_SANITY_CHECK
+			xfer_count++;
+#endif
+		}
+	} else {
+		insb(NE_BASE + NE_DATAPORT, buf, count);
+	}
+
+#ifdef NE_SANITY_CHECK
+	/* This was for the ALPHA version only, but enough people have
+	   been encountering problems so it is still here.  If you see
+	   this message you either 1) have a slightly incompatible clone
+	   or 2) have noise/speed problems with your bus. */
+
+	if (ei_debug > 1)
+	{
+		/* DMA termination address check... */
+		int addr, tries = 20;
+		do {
+			/* DON'T check for 'inb_p(EN0_ISR) & ENISR_RDC' here
+			   -- it's broken for Rx on some cards! */
+			int high = inb_p(nic_base + EN0_RSARHI);
+			int low = inb_p(nic_base + EN0_RSARLO);
+			addr = (high << 8) + low;
+			if (((ring_offset + xfer_count) & 0xff) == low)
+				break;
+		} while (--tries > 0);
+	 	if (tries <= 0)
+			printk(KERN_WARNING "%s: RX transfer address mismatch,"
+				"%#4.4x (expected) vs. %#4.4x (actual).\n",
+				dev->name, ring_offset + xfer_count, addr);
+	}
+#endif
+	outb_p(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	ei_status.dmaing &= ~0x01;
+}
+
+static void ne_block_output(struct net_device *dev, int count,
+		const unsigned char *buf, const int start_page)
+{
+	int nic_base = NE_BASE;
+	unsigned long dma_start;
+#ifdef NE_SANITY_CHECK
+	int retries = 0;
+#endif
+
+	/* Round the count up for word writes.  Do we need to do this?
+	   What effect will an odd byte count have on the 8390?
+	   I should check someday. */
+
+	if (ei_status.word16 && (count & 0x01))
+		count++;
+
+	/* This *shouldn't* happen. If it does, it's the last thing you'll see */
+	if (ei_status.dmaing)
+	{
+		printk(KERN_EMERG "%s: DMAing conflict in ne_block_output."
+			"[DMAstat:%d][irqlock:%d]\n",
+			dev->name, ei_status.dmaing, ei_status.irqlock);
+		return;
+	}
+	ei_status.dmaing |= 0x01;
+	/* We should already be in page 0, but to be safe... */
+	outb_p(E8390_PAGE0+E8390_START+E8390_NODMA, nic_base + NE_CMD);
+
+#ifdef NE_SANITY_CHECK
+retry:
+#endif
+
+#ifdef NE8390_RW_BUGFIX
+	/* Handle the read-before-write bug the same way as the
+	   Crynwr packet driver -- the NatSemi method doesn't work.
+	   Actually this doesn't always work either, but if you have
+	   problems with your NEx000 this is better than nothing! */
+
+	outb_p(0x42, nic_base + EN0_RCNTLO);
+	outb_p(0x00,   nic_base + EN0_RCNTHI);
+	outb_p(0x42, nic_base + EN0_RSARLO);
+	outb_p(0x00, nic_base + EN0_RSARHI);
+	outb_p(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+	/* Make certain that the dummy read has occurred. */
+	udelay(6);
+#endif
+
+	outb_p(ENISR_RDC, nic_base + EN0_ISR);
+
+	/* Now the normal output. */
+	outb_p(count & 0xff, nic_base + EN0_RCNTLO);
+	outb_p(count >> 8,   nic_base + EN0_RCNTHI);
+	outb_p(0x00, nic_base + EN0_RSARLO);
+	outb_p(start_page, nic_base + EN0_RSARHI);
+
+	outb_p(E8390_RWRITE+E8390_START, nic_base + NE_CMD);
+	if (ei_status.word16) {
+		outsw(NE_BASE + NE_DATAPORT, buf, count>>1);
+	} else {
+		outsb(NE_BASE + NE_DATAPORT, buf, count);
+	}
+
+	dma_start = jiffies;
+
+#ifdef NE_SANITY_CHECK
+	/* This was for the ALPHA version only, but enough people have
+	   been encountering problems so it is still here. */
+
+	if (ei_debug > 1)
+	{
+		/* DMA termination address check... */
+		int addr, tries = 20;
+		do {
+			int high = inb_p(nic_base + EN0_RSARHI);
+			int low = inb_p(nic_base + EN0_RSARLO);
+			addr = (high << 8) + low;
+			if ((start_page << 8) + count == addr)
+				break;
+		} while (--tries > 0);
+
+		if (tries <= 0)
+		{
+			printk(KERN_WARNING "%s: Tx packet transfer address mismatch,"
+				"%#4.4x (expected) vs. %#4.4x (actual).\n",
+				dev->name, (start_page << 8) + count, addr);
+			if (retries++ == 0)
+				goto retry;
+		}
+	}
+#endif
+
+	while ((inb_p(nic_base + EN0_ISR) & ENISR_RDC) == 0)
+		if (jiffies - dma_start > 2*HZ/100) {		/* 20ms */
+			printk(KERN_WARNING "%s: timeout waiting for Tx RDC.\n", dev->name);
+			ne_reset_8390(dev);
+			NS8390_init(dev,1);
+			break;
+		}
+
+	outb_p(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	ei_status.dmaing &= ~0x01;
+	return;
+}
+
+static struct net_device dev_ne;
+
+static int __init init_module(void)
+{
+    struct net_device *dev = &dev_ne;
+    extern unsigned int opt_ne_base;
+
+    if ( opt_ne_base == 0 ) return 0;
+
+    dev->irq = 0;
+    dev->mem_end = 0;
+    dev->base_addr = opt_ne_base;
+    dev->init = ne_probe;
+
+    if ( register_netdev(dev) != 0 )
+    {
+        printk(KERN_WARNING "ne.c: No card found at io %#x\n", opt_ne_base);
+    }
+
+    return 0;
+}
+
+static void __exit cleanup_module(void)
+{
+    struct net_device *dev = &dev_ne;
+    if ( dev->priv != NULL )
+    {
+        void *priv = dev->priv;
+        free_irq(dev->irq, dev);
+        release_region(dev->base_addr, NE_IO_EXTENT);
+        unregister_netdev(dev);
+        kfree(priv);
+    }
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xen/drivers/net/net_init.c b/xen/drivers/net/net_init.c
new file mode 100644
index 0000000000..3081ec22b3
--- /dev/null
+++ b/xen/drivers/net/net_init.c
@@ -0,0 +1,732 @@
+/* net_init.c: Initialization for network devices. */
+/*
+	Written 1993,1994,1995 by Donald Becker.
+
+	The author may be reached as becker@scyld.com, or C/O
+	Scyld Computing Corporation
+	410 Severn Ave., Suite 210
+	Annapolis MD 21403
+
+	This file contains the initialization for the "pl14+" style ethernet
+	drivers.  It should eventually replace most of drivers/net/Space.c.
+	It's primary advantage is that it's able to allocate low-memory buffers.
+	A secondary advantage is that the dangerous NE*000 netcards can reserve
+	their I/O port region before the SCSI probes start.
+
+	Modifications/additions by Bjorn Ekwall <bj0rn@blox.se>:
+		ethdev_index[MAX_ETH_CARDS]
+		register_netdev() / unregister_netdev()
+		
+	Modifications by Wolfgang Walter
+		Use dev_close cleanly so we always shut things down tidily.
+		
+	Changed 29/10/95, Alan Cox to pass sockaddr's around for mac addresses.
+	
+	14/06/96 - Paul Gortmaker:	Add generic eth_change_mtu() function. 
+	24/09/96 - Paul Norton: Add token-ring variants of the netdev functions. 
+	
+	08/11/99 - Alan Cox: Got fed up of the mess in this file and cleaned it
+			up. We now share common code and have regularised name
+			allocation setups. Abolished the 16 card limits.
+	03/19/2000 - jgarzik and Urban Widmark: init_etherdev 32-byte align
+	03/21/2001 - jgarzik: alloc_etherdev and friends
+
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+//#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/if_ether.h>
+#include <linux/lib.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/fddidevice.h>
+//#include <linux/hippidevice.h>
+//#include <linux/trdevice.h>
+//#include <linux/fcdevice.h>
+//#include <linux/if_arp.h>
+//#include <linux/if_ltalk.h>
+//#include <linux/rtnetlink.h>
+//#include <net/neighbour.h>
+
+#define rtnl_lock() ((void)0)
+#define rtnl_unlock() ((void)0)
+
+/* The network devices currently exist only in the socket namespace, so these
+   entries are unused.  The only ones that make sense are
+    open	start the ethercard
+    close	stop  the ethercard
+    ioctl	To get statistics, perhaps set the interface port (AUI, BNC, etc.)
+   One can also imagine getting raw packets using
+    read & write
+   but this is probably better handled by a raw packet socket.
+
+   Given that almost all of these functions are handled in the current
+   socket-based scheme, putting ethercard devices in /dev/ seems pointless.
+   
+   [Removed all support for /dev network devices. When someone adds
+    streams then by magic we get them, but otherwise they are un-needed
+	and a space waste]
+*/
+
+
+static struct net_device *alloc_netdev(int sizeof_priv, const char *mask,
+				       void (*setup)(struct net_device *))
+{
+	struct net_device *dev;
+	int alloc_size;
+
+	/* ensure 32-byte alignment of the private area */
+	alloc_size = sizeof (*dev) + sizeof_priv + 31;
+
+	dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL);
+	if (dev == NULL)
+	{
+		printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n");
+		return NULL;
+	}
+
+	memset(dev, 0, alloc_size);
+
+	if (sizeof_priv)
+		dev->priv = (void *) (((long)(dev + 1) + 31) & ~31);
+
+	setup(dev);
+	strcpy(dev->name, mask);
+
+	return dev;
+}
+
+static struct net_device *init_alloc_dev(int sizeof_priv)
+{
+	struct net_device *dev;
+	int alloc_size;
+
+	/* ensure 32-byte alignment of the private area */
+	alloc_size = sizeof (*dev) + sizeof_priv + 31;
+
+	dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL);
+	if (dev == NULL)
+	{
+		printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n");
+		return NULL;
+	}
+
+	memset(dev, 0, alloc_size);
+
+	if (sizeof_priv)
+		dev->priv = (void *) (((long)(dev + 1) + 31) & ~31);
+
+	return dev;
+}
+
+/* 
+ *	Create and name a device from a prototype, then perform any needed
+ *	setup.
+ */
+
+static struct net_device *init_netdev(struct net_device *dev, int sizeof_priv,
+				      char *mask, void (*setup)(struct net_device *))
+{
+	int new_device = 0;
+
+	/*
+	 *	Allocate a device if one is not provided.
+	 */
+	 
+	if (dev == NULL) {
+		dev=init_alloc_dev(sizeof_priv);
+		if(dev==NULL)
+			return NULL;
+		new_device = 1;
+	}
+
+	/*
+	 *	Allocate a name
+	 */
+	 
+	if (dev->name[0] == '\0' || dev->name[0] == ' ') {
+		strcpy(dev->name, mask);
+		if (dev_alloc_name(dev, mask)<0) {
+			if (new_device)
+				kfree(dev);
+			return NULL;
+		}
+	}
+
+	//netdev_boot_setup_check(dev);
+	
+	/*
+	 *	Configure via the caller provided setup function then
+	 *	register if needed.
+	 */
+	
+	setup(dev);
+	
+	if (new_device) {
+		int err;
+
+		rtnl_lock();
+		err = register_netdevice(dev);
+		rtnl_unlock();
+
+		if (err < 0) {
+			kfree(dev);
+			dev = NULL;
+		}
+	}
+	return dev;
+}
+
+#if defined(CONFIG_HIPPI) || defined(CONFIG_TR) || defined(CONFIG_NET_FC)
+static int __register_netdev(struct net_device *dev)
+{
+	if (dev->init && dev->init(dev) != 0) {
+		unregister_netdev(dev);
+		return -EIO;
+	}
+	return 0;
+}
+#endif
+
+/**
+ * init_etherdev - Register ethernet device
+ * @dev: An ethernet device structure to be filled in, or %NULL if a new
+ *	struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv)
+{
+	return init_netdev(dev, sizeof_priv, "eth%d", ether_setup);
+}
+
+/**
+ * alloc_etherdev - Allocates and sets up an ethernet device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic
+ * values. Basically does everything except registering the device.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_etherdev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "eth%d", ether_setup);
+}
+
+EXPORT_SYMBOL(init_etherdev);
+EXPORT_SYMBOL(alloc_etherdev);
+
+static int eth_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr=p;
+	if (netif_running(dev))
+		return -EBUSY;
+	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+	return 0;
+}
+
+static int eth_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if ((new_mtu < 68) || (new_mtu > 1500))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+#ifdef CONFIG_FDDI
+
+/**
+ * init_fddidev - Register FDDI device
+ * @dev: A FDDI device structure to be filled in, or %NULL if a new
+ *	struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_fddidev(struct net_device *dev, int sizeof_priv)
+{
+	return init_netdev(dev, sizeof_priv, "fddi%d", fddi_setup);
+}
+
+/**
+ * alloc_fddidev - Register FDDI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this FDDI device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_fddidev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
+}
+
+EXPORT_SYMBOL(init_fddidev);
+EXPORT_SYMBOL(alloc_fddidev);
+
+static int fddi_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
+		return(-EINVAL);
+	dev->mtu = new_mtu;
+	return(0);
+}
+
+#endif /* CONFIG_FDDI */
+
+#ifdef CONFIG_HIPPI
+
+static int hippi_change_mtu(struct net_device *dev, int new_mtu)
+{
+	/*
+	 * HIPPI's got these nice large MTUs.
+	 */
+	if ((new_mtu < 68) || (new_mtu > 65280))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return(0);
+}
+
+
+/*
+ * For HIPPI we will actually use the lower 4 bytes of the hardware
+ * address as the I-FIELD rather than the actual hardware address.
+ */
+static int hippi_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+	if (netif_running(dev))
+		return -EBUSY;
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	return 0;
+}
+
+
+/**
+ * init_hippi_dev - Register HIPPI device
+ * @dev: A HIPPI device structure to be filled in, or %NULL if a new
+ *	struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_hippi_dev(struct net_device *dev, int sizeof_priv)
+{
+	return init_netdev(dev, sizeof_priv, "hip%d", hippi_setup);
+}
+
+/**
+ * alloc_hippi_dev - Register HIPPI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this HIPPI device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_hippi_dev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "hip%d", hippi_setup);
+}
+
+int register_hipdev(struct net_device *dev)
+{
+	return __register_netdev(dev);
+}
+
+void unregister_hipdev(struct net_device *dev)
+{
+	unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(init_hippi_dev);
+EXPORT_SYMBOL(alloc_hippi_dev);
+EXPORT_SYMBOL(register_hipdev);
+EXPORT_SYMBOL(unregister_hipdev);
+
+static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
+{
+	/* Never send broadcast/multicast ARP messages */
+	p->mcast_probes = 0;
+ 
+	/* In IPv6 unicast probes are valid even on NBMA,
+	* because they are encapsulated in normal IPv6 protocol.
+	* Should be a generic flag. 
+	*/
+	if (p->tbl->family != AF_INET6)
+		p->ucast_probes = 0;
+	return 0;
+}
+
+#endif /* CONFIG_HIPPI */
+
+void ether_setup(struct net_device *dev)
+{
+	/* Fill in the fields of the device structure with ethernet-generic values.
+	   This should be in a common file instead of per-driver.  */
+	
+	dev->change_mtu		= eth_change_mtu;
+	dev->hard_header	= eth_header;
+	dev->rebuild_header 	= eth_rebuild_header;
+	dev->set_mac_address 	= eth_mac_addr;
+	dev->hard_header_cache	= eth_header_cache;
+	dev->header_cache_update= eth_header_cache_update;
+	dev->hard_header_parse	= eth_header_parse;
+
+	dev->type		= 0; //ARPHRD_ETHER;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->mtu		= 1500; /* eth_mtu */
+	dev->addr_len		= ETH_ALEN;
+
+	memset(dev->broadcast,0xFF, ETH_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
+}
+EXPORT_SYMBOL(ether_setup);
+
+#ifdef CONFIG_FDDI
+
+void fddi_setup(struct net_device *dev)
+{
+	/*
+	 * Fill in the fields of the device structure with FDDI-generic values.
+	 * This should be in a common file instead of per-driver.
+	 */
+	
+	dev->change_mtu			= fddi_change_mtu;
+	dev->hard_header		= fddi_header;
+	dev->rebuild_header		= fddi_rebuild_header;
+
+	dev->type				= ARPHRD_FDDI;
+	dev->hard_header_len	= FDDI_K_SNAP_HLEN+3;	/* Assume 802.2 SNAP hdr len + 3 pad bytes */
+	dev->mtu				= FDDI_K_SNAP_DLEN;		/* Assume max payload of 802.2 SNAP frame */
+	dev->addr_len			= FDDI_K_ALEN;
+	
+	memset(dev->broadcast, 0xFF, FDDI_K_ALEN);
+
+	/* New-style flags */
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+}
+EXPORT_SYMBOL(fddi_setup);
+
+#endif /* CONFIG_FDDI */
+
+#ifdef CONFIG_HIPPI
+void hippi_setup(struct net_device *dev)
+{
+	dev->set_multicast_list	= NULL;
+	dev->change_mtu			= hippi_change_mtu;
+	dev->hard_header		= hippi_header;
+	dev->rebuild_header 		= hippi_rebuild_header;
+	dev->set_mac_address 		= hippi_mac_addr;
+	dev->hard_header_parse		= NULL;
+	dev->hard_header_cache		= NULL;
+	dev->header_cache_update	= NULL;
+	dev->neigh_setup 		= hippi_neigh_setup_dev; 
+
+	/*
+	 * We don't support HIPPI `ARP' for the time being, and probably
+	 * never will unless someone else implements it. However we
+	 * still need a fake ARPHRD to make ifconfig and friends play ball.
+	 */
+	dev->type		= ARPHRD_HIPPI;
+	dev->hard_header_len 	= HIPPI_HLEN;
+	dev->mtu		= 65280;
+	dev->addr_len		= HIPPI_ALEN;
+
+	memset(dev->broadcast, 0xFF, HIPPI_ALEN);
+
+	/*
+	 * HIPPI doesn't support broadcast+multicast and we only use
+	 * static ARP tables. ARP is disabled by hippi_neigh_setup_dev. 
+	 */
+	dev->flags = 0; 
+}
+EXPORT_SYMBOL(hippi_setup);
+#endif /* CONFIG_HIPPI */
+
+#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
+
+static int ltalk_change_mtu(struct net_device *dev, int mtu)
+{
+	return -EINVAL;
+}
+
+static int ltalk_mac_addr(struct net_device *dev, void *addr)
+{	
+	return -EINVAL;
+}
+
+
+void ltalk_setup(struct net_device *dev)
+{
+	/* Fill in the fields of the device structure with localtalk-generic values. */
+	
+	dev->change_mtu		= ltalk_change_mtu;
+	dev->hard_header	= NULL;
+	dev->rebuild_header 	= NULL;
+	dev->set_mac_address 	= ltalk_mac_addr;
+	dev->hard_header_cache	= NULL;
+	dev->header_cache_update= NULL;
+
+	dev->type		= ARPHRD_LOCALTLK;
+	dev->hard_header_len 	= LTALK_HLEN;
+	dev->mtu		= LTALK_MTU;
+	dev->addr_len		= LTALK_ALEN;
+	
+	dev->broadcast[0]	= 0xFF;
+
+	dev->flags		= IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
+}
+EXPORT_SYMBOL(ltalk_setup);
+
+#endif /* CONFIG_ATALK || CONFIG_ATALK_MODULE */
+
+int register_netdev(struct net_device *dev)
+{
+	int err;
+
+	rtnl_lock();
+
+	/*
+	 *	If the name is a format string the caller wants us to
+	 *	do a name allocation
+	 */
+	 
+	if (strchr(dev->name, '%'))
+	{
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto out;
+	}
+	
+	/*
+	 *	Back compatibility hook. Kill this one in 2.5
+	 */
+	
+	if (dev->name[0]==0 || dev->name[0]==' ')
+	{
+		err = dev_alloc_name(dev, "eth%d");
+		if (err < 0)
+			goto out;
+	}
+
+	err = register_netdevice(dev);
+
+out:
+	rtnl_unlock();
+	return err;
+}
+
+void unregister_netdev(struct net_device *dev)
+{
+	rtnl_lock();
+	unregister_netdevice(dev);
+	rtnl_unlock();
+}
+
+EXPORT_SYMBOL(register_netdev);
+EXPORT_SYMBOL(unregister_netdev);
+
+#ifdef CONFIG_TR
+
+void tr_setup(struct net_device *dev)
+{
+	/*
+	 *	Configure and register
+	 */
+	
+	dev->hard_header	= tr_header;
+	dev->rebuild_header	= tr_rebuild_header;
+
+	dev->type		= ARPHRD_IEEE802_TR;
+	dev->hard_header_len	= TR_HLEN;
+	dev->mtu		= 2000;
+	dev->addr_len		= TR_ALEN;
+	
+	memset(dev->broadcast,0xFF, TR_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST ;
+}
+
+/**
+ * init_trdev - Register token ring device
+ * @dev: A token ring device structure to be filled in, or %NULL if a new
+ *	struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_trdev(struct net_device *dev, int sizeof_priv)
+{
+	return init_netdev(dev, sizeof_priv, "tr%d", tr_setup);
+}
+
+/**
+ * alloc_trdev - Register token ring device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this token ring device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_trdev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
+}
+
+int register_trdev(struct net_device *dev)
+{
+	return __register_netdev(dev);
+}
+
+void unregister_trdev(struct net_device *dev)
+{
+	unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(tr_setup);
+EXPORT_SYMBOL(init_trdev);
+EXPORT_SYMBOL(alloc_trdev);
+EXPORT_SYMBOL(register_trdev);
+EXPORT_SYMBOL(unregister_trdev);
+
+#endif /* CONFIG_TR */
+
+
+#ifdef CONFIG_NET_FC
+
+void fc_setup(struct net_device *dev)
+{
+	dev->hard_header        =        fc_header;
+        dev->rebuild_header  	=        fc_rebuild_header;
+                
+        dev->type               =        ARPHRD_IEEE802;
+	dev->hard_header_len    =        FC_HLEN;
+        dev->mtu                =        2024;
+        dev->addr_len           =        FC_ALEN;
+
+        memset(dev->broadcast,0xFF, FC_ALEN);
+
+        /* New-style flags. */
+        dev->flags              =        IFF_BROADCAST;
+}
+
+/**
+ * init_fcdev - Register fibre channel device
+ * @dev: A fibre channel device structure to be filled in, or %NULL if a new
+ *	struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_fcdev(struct net_device *dev, int sizeof_priv)
+{
+	return init_netdev(dev, sizeof_priv, "fc%d", fc_setup);
+}
+
+/**
+ * alloc_fcdev - Register fibre channel device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this fibre channel device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_fcdev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "fc%d", fc_setup);
+}
+
+int register_fcdev(struct net_device *dev)
+{
+	return __register_netdev(dev);
+}                                               
+        
+void unregister_fcdev(struct net_device *dev)
+{
+	unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(fc_setup);
+EXPORT_SYMBOL(init_fcdev);
+EXPORT_SYMBOL(alloc_fcdev);
+EXPORT_SYMBOL(register_fcdev);
+EXPORT_SYMBOL(unregister_fcdev);
+
+#endif /* CONFIG_NET_FC */
+
diff --git a/xen/drivers/net/setup.c b/xen/drivers/net/setup.c
new file mode 100644
index 0000000000..1352a1cb4b
--- /dev/null
+++ b/xen/drivers/net/setup.c
@@ -0,0 +1,173 @@
+
+/*
+ *	New style setup code for the network devices
+ */
+ 
+#include <linux/config.h>
+#include <linux/netdevice.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+//#include <linux/netlink.h>
+
+extern int slip_init_ctrl_dev(void);
+extern int x25_asy_init_ctrl_dev(void);
+  
+extern int dmascc_init(void);
+
+extern int awc4500_pci_probe(void);
+extern int awc4500_isa_probe(void);
+extern int awc4500_pnp_probe(void);
+extern int awc4500_365_probe(void);
+extern int arcnet_init(void); 
+extern int scc_enet_init(void); 
+extern int fec_enet_init(void); 
+extern int dlci_setup(void); 
+extern int sdla_setup(void); 
+extern int sdla_c_setup(void); 
+extern int comx_init(void);
+extern int lmc_setup(void);
+
+extern int madgemc_probe(void);
+extern int uml_net_probe(void);
+
+/* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */
+#define __PAD6 "\0\0\0\0\0\0\0\0\0"
+#define __PAD5 __PAD6 "\0"
+#define __PAD4 __PAD5 "\0"
+#define __PAD3 __PAD4 "\0"
+#define __PAD2 __PAD3 "\0"
+
+
+/*
+ *	Devices in this list must do new style probing. That is they must
+ *	allocate their own device objects and do their own bus scans.
+ */
+
+struct net_probe
+{
+	int (*probe)(void);
+	int status;	/* non-zero if autoprobe has failed */
+};
+ 
+static struct net_probe pci_probes[] __initdata = {
+	/*
+	 *	Early setup devices
+	 */
+
+#if defined(CONFIG_DMASCC)
+	{dmascc_init, 0},
+#endif	
+#if defined(CONFIG_DLCI)
+	{dlci_setup, 0},
+#endif
+#if defined(CONFIG_SDLA)
+	{sdla_c_setup, 0},
+#endif
+#if defined(CONFIG_ARCNET)
+	{arcnet_init, 0},
+#endif
+#if defined(CONFIG_SCC_ENET)
+        {scc_enet_init, 0},
+#endif
+#if defined(CONFIG_FEC_ENET)
+        {fec_enet_init, 0},
+#endif
+#if defined(CONFIG_COMX)
+	{comx_init, 0},
+#endif
+	 
+#if defined(CONFIG_LANMEDIA)
+	{lmc_setup, 0},
+#endif
+	 
+/*
+*
+*	Wireless non-HAM
+*
+*/
+#ifdef CONFIG_AIRONET4500_NONCS
+
+#ifdef CONFIG_AIRONET4500_PCI
+	{awc4500_pci_probe,0},
+#endif
+
+#ifdef CONFIG_AIRONET4500_PNP
+	{awc4500_pnp_probe,0},
+#endif
+
+#endif
+
+/*
+ *	Token Ring Drivers
+ */  
+#ifdef CONFIG_MADGEMC
+	{madgemc_probe, 0},
+#endif
+#ifdef CONFIG_UML_NET
+	{uml_net_probe, 0},
+#endif
+ 
+	{NULL, 0},
+};
+
+
+/*
+ *	Run the updated device probes. These do not need a device passed
+ *	into them.
+ */
+ 
+static void __init network_probe(void)
+{
+	struct net_probe *p = pci_probes;
+
+	while (p->probe != NULL)
+	{
+		p->status = p->probe();
+		p++;
+	}
+}
+
+
+/*
+ *	Initialise the line discipline drivers
+ */
+ 
+static void __init network_ldisc_init(void)
+{
+#if defined(CONFIG_SLIP)
+	slip_init_ctrl_dev();
+#endif
+#if defined(CONFIG_X25_ASY)
+	x25_asy_init_ctrl_dev();
+#endif
+}
+
+
+static void __init special_device_init(void)
+{
+#ifdef CONFIG_NET_SB1000
+	{
+		extern int sb1000_probe(struct net_device *dev);
+		static struct net_device sb1000_dev = 
+		{
+			"cm0" __PAD3, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, sb1000_probe 
+		};
+		register_netdev(&sb1000_dev);
+	}
+#endif
+}
+
+/*
+ *	Initialise network devices
+ */
+ 
+void __init net_device_init(void)
+{
+	/* Devices supporting the new probing API */
+	network_probe();
+	/* Line disciplines */
+	network_ldisc_init();
+	/* Special devices */
+	special_device_init();
+	/* That kicks off the legacy init functions */
+}
diff --git a/xen/drivers/net/tg3.c b/xen/drivers/net/tg3.c
new file mode 100644
index 0000000000..41f680904b
--- /dev/null
+++ b/xen/drivers/net/tg3.c
@@ -0,0 +1,6884 @@
+/* $Id: tg3.c,v 1.43.2.80 2002/03/14 00:10:04 davem Exp $
+ * tg3.c: Broadcom Tigon3 ethernet driver.
+ *
+ * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2001, 2002 Jeff Garzik (jgarzik@pobox.com)
+ */
+
+#include <linux/config.h>
+
+#include <linux/module.h>
+
+//#include <linux/kernel.h>
+#include <linux/types.h>
+//#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+
+#ifndef PCI_DMA_BUS_IS_PHYS
+#define PCI_DMA_BUS_IS_PHYS 1
+#endif
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#define TG3_VLAN_TAG_USED 1
+#else
+#define TG3_VLAN_TAG_USED 0
+#endif
+
+#ifdef NETIF_F_TSO
+/* XXX some bug in tso firmware hangs tx cpu, disabled until fixed */
+#define TG3_DO_TSO	0
+#else
+#define TG3_DO_TSO	0
+#endif
+
+#include "tg3.h"
+
+#define DRV_MODULE_NAME		"tg3"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.2a"
+#define DRV_MODULE_RELDATE	"Dec 9, 2002"
+
+#define TG3_DEF_MAC_MODE	0
+#define TG3_DEF_RX_MODE		0
+#define TG3_DEF_TX_MODE		0
+#define TG3_DEF_MSG_ENABLE	  \
+	(NETIF_MSG_DRV		| \
+	 NETIF_MSG_PROBE	| \
+	 NETIF_MSG_LINK		| \
+	 NETIF_MSG_TIMER	| \
+	 NETIF_MSG_IFDOWN	| \
+	 NETIF_MSG_IFUP		| \
+	 NETIF_MSG_RX_ERR	| \
+	 NETIF_MSG_TX_ERR)
+
+/* length of time before we decide the hardware is borked,
+ * and dev->tx_timeout() should be called to fix the problem
+ */
+#define TG3_TX_TIMEOUT			(5 * HZ)
+
+/* hardware minimum and maximum for a single frame's data payload */
+#define TG3_MIN_MTU			60
+#define TG3_MAX_MTU			9000
+
+/* These numbers seem to be hard coded in the NIC firmware somehow.
+ * You can't change the ring sizes, but you can change where you place
+ * them in the NIC onboard memory.
+ */
+#define TG3_RX_RING_SIZE		512
+#define TG3_DEF_RX_RING_PENDING		200
+#define TG3_RX_JUMBO_RING_SIZE		256
+#define TG3_DEF_RX_JUMBO_RING_PENDING	100
+#define TG3_RX_RCB_RING_SIZE		1024
+#define TG3_TX_RING_SIZE		512
+#define TG3_DEF_TX_RING_PENDING		(TG3_TX_RING_SIZE - 1)
+
+#define TG3_RX_RING_BYTES	(sizeof(struct tg3_rx_buffer_desc) * \
+				 TG3_RX_RING_SIZE)
+#define TG3_RX_JUMBO_RING_BYTES	(sizeof(struct tg3_rx_buffer_desc) * \
+			         TG3_RX_JUMBO_RING_SIZE)
+#define TG3_RX_RCB_RING_BYTES	(sizeof(struct tg3_rx_buffer_desc) * \
+			         TG3_RX_RCB_RING_SIZE)
+#define TG3_TX_RING_BYTES	(sizeof(struct tg3_tx_buffer_desc) * \
+				 TG3_TX_RING_SIZE)
+#define TX_RING_GAP(TP)	\
+	(TG3_TX_RING_SIZE - (TP)->tx_pending)
+#define TX_BUFFS_AVAIL(TP)						\
+	(((TP)->tx_cons <= (TP)->tx_prod) ?				\
+	  (TP)->tx_cons + (TP)->tx_pending - (TP)->tx_prod :		\
+	  (TP)->tx_cons - (TP)->tx_prod - TX_RING_GAP(TP))
+#define NEXT_TX(N)		(((N) + 1) & (TG3_TX_RING_SIZE - 1))
+
+#define RX_PKT_BUF_SZ		(1536 + tp->rx_offset + 64)
+#define RX_JUMBO_PKT_BUF_SZ	(9046 + tp->rx_offset + 64)
+
+/* minimum number of free TX descriptors required to wake up TX process */
+#define TG3_TX_WAKEUP_THRESH		(TG3_TX_RING_SIZE / 4)
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox.com)");
+MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM(tg3_debug, "i");
+MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
+
+static int tg3_debug = -1;	/* -1 == use TG3_DEF_MSG_ENABLE as value */
+
+static struct pci_device_id tg3_pci_tbl[] __devinitdata = {
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_SYSKONNECT, 0x4400,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
+
+static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val)
+{
+	if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&tp->indirect_lock, flags);
+		pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off);
+		pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val);
+		spin_unlock_irqrestore(&tp->indirect_lock, flags);
+	} else {
+		writel(val, tp->regs + off);
+	}
+}
+
+#define tw32(reg,val)		tg3_write_indirect_reg32(tp,(reg),(val))
+#define tw32_mailbox(reg, val)	writel(((val) & 0xffffffff), tp->regs + (reg))
+#define tw16(reg,val)		writew(((val) & 0xffff), tp->regs + (reg))
+#define tw8(reg,val)		writeb(((val) & 0xff), tp->regs + (reg))
+#define tr32(reg)		readl(tp->regs + (reg))
+#define tr16(reg)		readw(tp->regs + (reg))
+#define tr8(reg)		readb(tp->regs + (reg))
+
+static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tp->indirect_lock, flags);
+	pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
+	pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
+
+	/* Always leave this as zero. */
+	pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
+	spin_unlock_irqrestore(&tp->indirect_lock, flags);
+}
+
+static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tp->indirect_lock, flags);
+	pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
+	pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
+
+	/* Always leave this as zero. */
+	pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
+	spin_unlock_irqrestore(&tp->indirect_lock, flags);
+}
+
+static void tg3_disable_ints(struct tg3 *tp)
+{
+	tw32(TG3PCI_MISC_HOST_CTRL,
+	     (tp->misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT));
+	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
+	tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+}
+
+static void tg3_enable_ints(struct tg3 *tp)
+{
+	tw32(TG3PCI_MISC_HOST_CTRL,
+	     (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
+	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
+	tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+
+	if (tp->hw_status->status & SD_STATUS_UPDATED)
+		tw32(GRC_LOCAL_CTRL,
+		     tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+}
+
+static void tg3_switch_clocks(struct tg3 *tp)
+{
+	if (tr32(TG3PCI_CLOCK_CTRL) & CLOCK_CTRL_44MHZ_CORE) {
+		tw32(TG3PCI_CLOCK_CTRL,
+		     (CLOCK_CTRL_44MHZ_CORE | CLOCK_CTRL_ALTCLK));
+		tr32(TG3PCI_CLOCK_CTRL);
+		udelay(40);
+		tw32(TG3PCI_CLOCK_CTRL,
+		     (CLOCK_CTRL_ALTCLK));
+		tr32(TG3PCI_CLOCK_CTRL);
+		udelay(40);
+	}
+	tw32(TG3PCI_CLOCK_CTRL, 0);
+	tr32(TG3PCI_CLOCK_CTRL);
+	udelay(40);
+}
+
+#define PHY_BUSY_LOOPS	5000
+
+static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
+{
+	u32 frame_val;
+	int loops, ret;
+
+	if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+		tw32(MAC_MI_MODE,
+		     (tp->mi_mode & ~MAC_MI_MODE_AUTO_POLL));
+		tr32(MAC_MI_MODE);
+		udelay(40);
+	}
+
+	*val = 0xffffffff;
+
+	frame_val  = ((PHY_ADDR << MI_COM_PHY_ADDR_SHIFT) &
+		      MI_COM_PHY_ADDR_MASK);
+	frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
+		      MI_COM_REG_ADDR_MASK);
+	frame_val |= (MI_COM_CMD_READ | MI_COM_START);
+	
+	tw32(MAC_MI_COM, frame_val);
+	tr32(MAC_MI_COM);
+
+	loops = PHY_BUSY_LOOPS;
+	while (loops-- > 0) {
+		udelay(10);
+		frame_val = tr32(MAC_MI_COM);
+
+		if ((frame_val & MI_COM_BUSY) == 0) {
+			udelay(5);
+			frame_val = tr32(MAC_MI_COM);
+			break;
+		}
+	}
+
+	ret = -EBUSY;
+	if (loops > 0) {
+		*val = frame_val & MI_COM_DATA_MASK;
+		ret = 0;
+	}
+
+	if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+		tw32(MAC_MI_MODE, tp->mi_mode);
+		tr32(MAC_MI_MODE);
+		udelay(40);
+	}
+
+	return ret;
+}
+
+static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
+{
+	u32 frame_val;
+	int loops, ret;
+
+	if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+		tw32(MAC_MI_MODE,
+		     (tp->mi_mode & ~MAC_MI_MODE_AUTO_POLL));
+		tr32(MAC_MI_MODE);
+		udelay(40);
+	}
+
+	frame_val  = ((PHY_ADDR << MI_COM_PHY_ADDR_SHIFT) &
+		      MI_COM_PHY_ADDR_MASK);
+	frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
+		      MI_COM_REG_ADDR_MASK);
+	frame_val |= (val & MI_COM_DATA_MASK);
+	frame_val |= (MI_COM_CMD_WRITE | MI_COM_START);
+	
+	tw32(MAC_MI_COM, frame_val);
+	tr32(MAC_MI_COM);
+
+	loops = PHY_BUSY_LOOPS;
+	while (loops-- > 0) {
+		udelay(10);
+		frame_val = tr32(MAC_MI_COM);
+		if ((frame_val & MI_COM_BUSY) == 0) {
+			udelay(5);
+			frame_val = tr32(MAC_MI_COM);
+			break;
+		}
+	}
+
+	ret = -EBUSY;
+	if (loops > 0)
+		ret = 0;
+
+	if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
+		tw32(MAC_MI_MODE, tp->mi_mode);
+		tr32(MAC_MI_MODE);
+		udelay(40);
+	}
+
+	return ret;
+}
+
+/* This will reset the tigon3 PHY if there is no valid
+ * link unless the FORCE argument is non-zero.
+ */
+static int tg3_phy_reset(struct tg3 *tp, int force)
+{
+	u32 phy_status, phy_control;
+	int err, limit;
+
+	err  = tg3_readphy(tp, MII_BMSR, &phy_status);
+	err |= tg3_readphy(tp, MII_BMSR, &phy_status);
+	if (err != 0)
+		return -EBUSY;
+
+	/* If we have link, and not forcing a reset, then nothing
+	 * to do.
+	 */
+	if ((phy_status & BMSR_LSTATUS) != 0 && (force == 0))
+		return 0;
+
+	/* OK, reset it, and poll the BMCR_RESET bit until it
+	 * clears or we time out.
+	 */
+	phy_control = BMCR_RESET;
+	err = tg3_writephy(tp, MII_BMCR, phy_control);
+	if (err != 0)
+		return -EBUSY;
+
+	limit = 5000;
+	while (limit--) {
+		err = tg3_readphy(tp, MII_BMCR, &phy_control);
+		if (err != 0)
+			return -EBUSY;
+
+		if ((phy_control & BMCR_RESET) == 0) {
+			udelay(40);
+			return 0;
+		}
+		udelay(10);
+	}
+
+	return -EBUSY;
+}
+
+static int tg3_setup_phy(struct tg3 *);
+static int tg3_halt(struct tg3 *);
+
+static int tg3_set_power_state(struct tg3 *tp, int state)
+{
+	u32 misc_host_ctrl;
+	u16 power_control, power_caps;
+	int pm = tp->pm_cap;
+
+	/* Make sure register accesses (indirect or otherwise)
+	 * will function correctly.
+	 */
+	pci_write_config_dword(tp->pdev,
+			       TG3PCI_MISC_HOST_CTRL,
+			       tp->misc_host_ctrl);
+
+	pci_read_config_word(tp->pdev,
+			     pm + PCI_PM_CTRL,
+			     &power_control);
+	power_control |= PCI_PM_CTRL_PME_STATUS;
+	power_control &= ~(PCI_PM_CTRL_STATE_MASK);
+	switch (state) {
+	case 0:
+		power_control |= 0;
+		pci_write_config_word(tp->pdev,
+				      pm + PCI_PM_CTRL,
+				      power_control);
+		tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
+		tr32(GRC_LOCAL_CTRL);
+		udelay(100);
+
+		return 0;
+
+	case 1:
+		power_control |= 1;
+		break;
+
+	case 2:
+		power_control |= 2;
+		break;
+
+	case 3:
+		power_control |= 3;
+		break;
+
+	default:
+		printk(KERN_WARNING PFX "%s: Invalid power state (%d) "
+		       "requested.\n",
+		       tp->dev->name, state);
+		return -EINVAL;
+	};
+
+	power_control |= PCI_PM_CTRL_PME_ENABLE;
+
+	misc_host_ctrl = tr32(TG3PCI_MISC_HOST_CTRL);
+	tw32(TG3PCI_MISC_HOST_CTRL,
+	     misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT);
+
+	if (tp->link_config.phy_is_low_power == 0) {
+		tp->link_config.phy_is_low_power = 1;
+		tp->link_config.orig_speed = tp->link_config.speed;
+		tp->link_config.orig_duplex = tp->link_config.duplex;
+		tp->link_config.orig_autoneg = tp->link_config.autoneg;
+	}
+
+	if (tp->phy_id != PHY_ID_SERDES) {
+		tp->link_config.speed = SPEED_10;
+		tp->link_config.duplex = DUPLEX_HALF;
+		tp->link_config.autoneg = AUTONEG_ENABLE;
+		tg3_setup_phy(tp);
+	}
+
+	tg3_halt(tp);
+
+	pci_read_config_word(tp->pdev, pm + PCI_PM_PMC, &power_caps);
+
+	if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE) {
+		u32 mac_mode;
+
+		if (tp->phy_id != PHY_ID_SERDES) {
+			tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x5a);
+			udelay(40);
+
+			mac_mode = MAC_MODE_PORT_MODE_MII;
+
+			if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 ||
+			    !(tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB))
+				mac_mode |= MAC_MODE_LINK_POLARITY;
+		} else {
+			mac_mode = MAC_MODE_PORT_MODE_TBI;
+		}
+
+
+		if (((power_caps & PCI_PM_CAP_PME_D3cold) &&
+		     (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)))
+			mac_mode |= MAC_MODE_MAGIC_PKT_ENABLE;
+
+		tw32(MAC_MODE, mac_mode);
+		tr32(MAC_MODE);
+		udelay(100);
+
+		tw32(MAC_RX_MODE, RX_MODE_ENABLE);
+		tr32(MAC_RX_MODE);
+		udelay(10);
+	}
+
+	if (tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB) {
+		u32 base_val;
+
+		base_val = 0;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)
+			base_val |= (CLOCK_CTRL_RXCLK_DISABLE |
+				     CLOCK_CTRL_TXCLK_DISABLE);
+
+		tw32(TG3PCI_CLOCK_CTRL, base_val |
+		     CLOCK_CTRL_ALTCLK);
+		tr32(TG3PCI_CLOCK_CTRL);
+		udelay(40);
+
+		tw32(TG3PCI_CLOCK_CTRL, base_val |
+		     CLOCK_CTRL_ALTCLK |
+		     CLOCK_CTRL_44MHZ_CORE);
+		tr32(TG3PCI_CLOCK_CTRL);
+		udelay(40);
+
+		tw32(TG3PCI_CLOCK_CTRL, base_val |
+		     CLOCK_CTRL_44MHZ_CORE);
+		tr32(TG3PCI_CLOCK_CTRL);
+		udelay(40);
+	} else {
+		u32 base_val;
+
+		base_val = 0;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)
+			base_val |= (CLOCK_CTRL_RXCLK_DISABLE |
+				     CLOCK_CTRL_TXCLK_DISABLE);
+
+		tw32(TG3PCI_CLOCK_CTRL, base_val |
+		     CLOCK_CTRL_ALTCLK |
+		     CLOCK_CTRL_PWRDOWN_PLL133);
+		tr32(TG3PCI_CLOCK_CTRL);
+		udelay(40);
+	}
+
+	if (!(tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) &&
+	    (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)) {
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701) {
+			tw32(GRC_LOCAL_CTRL,
+			     (GRC_LCLCTRL_GPIO_OE0 |
+			      GRC_LCLCTRL_GPIO_OE1 |
+			      GRC_LCLCTRL_GPIO_OE2 |
+			      GRC_LCLCTRL_GPIO_OUTPUT0 |
+			      GRC_LCLCTRL_GPIO_OUTPUT1));
+			tr32(GRC_LOCAL_CTRL);
+			udelay(100);
+		} else {
+			tw32(GRC_LOCAL_CTRL,
+			     (GRC_LCLCTRL_GPIO_OE0 |
+			      GRC_LCLCTRL_GPIO_OE1 |
+			      GRC_LCLCTRL_GPIO_OE2 |
+			      GRC_LCLCTRL_GPIO_OUTPUT1 |
+			      GRC_LCLCTRL_GPIO_OUTPUT2));
+			tr32(GRC_LOCAL_CTRL);
+			udelay(100);
+
+			tw32(GRC_LOCAL_CTRL,
+			     (GRC_LCLCTRL_GPIO_OE0 |
+			      GRC_LCLCTRL_GPIO_OE1 |
+			      GRC_LCLCTRL_GPIO_OE2 |
+			      GRC_LCLCTRL_GPIO_OUTPUT0 |
+			      GRC_LCLCTRL_GPIO_OUTPUT1 |
+			      GRC_LCLCTRL_GPIO_OUTPUT2));
+			tr32(GRC_LOCAL_CTRL);
+			udelay(100);
+
+			tw32(GRC_LOCAL_CTRL,
+			     (GRC_LCLCTRL_GPIO_OE0 |
+			      GRC_LCLCTRL_GPIO_OE1 |
+			      GRC_LCLCTRL_GPIO_OE2 |
+			      GRC_LCLCTRL_GPIO_OUTPUT0 |
+			      GRC_LCLCTRL_GPIO_OUTPUT1));
+			tr32(GRC_LOCAL_CTRL);
+			udelay(100);
+		}
+	}
+
+	/* Finally, set the new power state. */
+	pci_write_config_word(tp->pdev, pm + PCI_PM_CTRL, power_control);
+
+	return 0;
+}
+
+static void tg3_link_report(struct tg3 *tp)
+{
+	if (!netif_carrier_ok(tp->dev)) {
+		printk(KERN_INFO PFX "%s: Link is down.\n", tp->dev->name);
+	} else {
+		printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex.\n",
+		       tp->dev->name,
+		       (tp->link_config.active_speed == SPEED_1000 ?
+			1000 :
+			(tp->link_config.active_speed == SPEED_100 ?
+			 100 : 10)),
+		       (tp->link_config.active_duplex == DUPLEX_FULL ?
+			"full" : "half"));
+
+		printk(KERN_INFO PFX "%s: Flow control is %s for TX and "
+		       "%s for RX.\n",
+		       tp->dev->name,
+		       (tp->tg3_flags & TG3_FLAG_TX_PAUSE) ? "on" : "off",
+		       (tp->tg3_flags & TG3_FLAG_RX_PAUSE) ? "on" : "off");
+	}
+}
+
+static void tg3_setup_flow_control(struct tg3 *tp, u32 local_adv, u32 remote_adv)
+{
+	u32 new_tg3_flags = 0;
+
+	if (local_adv & ADVERTISE_PAUSE_CAP) {
+		if (local_adv & ADVERTISE_PAUSE_ASYM) {
+			if (remote_adv & LPA_PAUSE_CAP)
+				new_tg3_flags |=
+					(TG3_FLAG_RX_PAUSE |
+					 TG3_FLAG_TX_PAUSE);
+			else if (remote_adv & LPA_PAUSE_ASYM)
+				new_tg3_flags |=
+					(TG3_FLAG_RX_PAUSE);
+		} else {
+			if (remote_adv & LPA_PAUSE_CAP)
+				new_tg3_flags |=
+					(TG3_FLAG_RX_PAUSE |
+					 TG3_FLAG_TX_PAUSE);
+		}
+	} else if (local_adv & ADVERTISE_PAUSE_ASYM) {
+		if ((remote_adv & LPA_PAUSE_CAP) &&
+		    (remote_adv & LPA_PAUSE_ASYM))
+			new_tg3_flags |= TG3_FLAG_TX_PAUSE;
+	}
+
+	tp->tg3_flags &= ~(TG3_FLAG_RX_PAUSE | TG3_FLAG_TX_PAUSE);
+	tp->tg3_flags |= new_tg3_flags;
+
+	if (new_tg3_flags & TG3_FLAG_RX_PAUSE)
+		tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE;
+	else
+		tp->rx_mode &= ~RX_MODE_FLOW_CTRL_ENABLE;
+
+	if (new_tg3_flags & TG3_FLAG_TX_PAUSE)
+		tp->tx_mode |= TX_MODE_FLOW_CTRL_ENABLE;
+	else
+		tp->tx_mode &= ~TX_MODE_FLOW_CTRL_ENABLE;
+}
+
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+{
+	switch (val & MII_TG3_AUX_STAT_SPDMASK) {
+	case MII_TG3_AUX_STAT_10HALF:
+		*speed = SPEED_10;
+		*duplex = DUPLEX_HALF;
+		break;
+
+	case MII_TG3_AUX_STAT_10FULL:
+		*speed = SPEED_10;
+		*duplex = DUPLEX_FULL;
+		break;
+
+	case MII_TG3_AUX_STAT_100HALF:
+		*speed = SPEED_100;
+		*duplex = DUPLEX_HALF;
+		break;
+
+	case MII_TG3_AUX_STAT_100FULL:
+		*speed = SPEED_100;
+		*duplex = DUPLEX_FULL;
+		break;
+
+	case MII_TG3_AUX_STAT_1000HALF:
+		*speed = SPEED_1000;
+		*duplex = DUPLEX_HALF;
+		break;
+
+	case MII_TG3_AUX_STAT_1000FULL:
+		*speed = SPEED_1000;
+		*duplex = DUPLEX_FULL;
+		break;
+
+	default:
+		*speed = SPEED_INVALID;
+		*duplex = DUPLEX_INVALID;
+		break;
+	};
+}
+
+static int tg3_phy_copper_begin(struct tg3 *tp, int wait_for_link)
+{
+	u32 new_adv;
+	int i;
+
+	if (tp->link_config.phy_is_low_power) {
+		/* Entering low power mode.  Disable gigabit and
+		 * 100baseT advertisements.
+		 */
+		tg3_writephy(tp, MII_TG3_CTRL, 0);
+
+		new_adv = (ADVERTISE_10HALF | ADVERTISE_10FULL |
+			   ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
+		if (tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB)
+			new_adv |= (ADVERTISE_100HALF | ADVERTISE_100FULL);
+
+		tg3_writephy(tp, MII_ADVERTISE, new_adv);
+	} else if (tp->link_config.speed == SPEED_INVALID) {
+		tp->link_config.advertising =
+			(ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
+			 ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
+			 ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full |
+			 ADVERTISED_Autoneg | ADVERTISED_MII);
+
+		if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
+			tp->link_config.advertising &=
+				~(ADVERTISED_1000baseT_Half |
+				  ADVERTISED_1000baseT_Full);
+
+		new_adv = (ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
+		if (tp->link_config.advertising & ADVERTISED_10baseT_Half)
+			new_adv |= ADVERTISE_10HALF;
+		if (tp->link_config.advertising & ADVERTISED_10baseT_Full)
+			new_adv |= ADVERTISE_10FULL;
+		if (tp->link_config.advertising & ADVERTISED_100baseT_Half)
+			new_adv |= ADVERTISE_100HALF;
+		if (tp->link_config.advertising & ADVERTISED_100baseT_Full)
+			new_adv |= ADVERTISE_100FULL;
+		tg3_writephy(tp, MII_ADVERTISE, new_adv);
+
+		if (tp->link_config.advertising &
+		    (ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full)) {
+			new_adv = 0;
+			if (tp->link_config.advertising & ADVERTISED_1000baseT_Half)
+				new_adv |= MII_TG3_CTRL_ADV_1000_HALF;
+			if (tp->link_config.advertising & ADVERTISED_1000baseT_Full)
+				new_adv |= MII_TG3_CTRL_ADV_1000_FULL;
+			if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY) &&
+			    (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+			     tp->pci_chip_rev_id == CHIPREV_ID_5701_B0))
+				new_adv |= (MII_TG3_CTRL_AS_MASTER |
+					    MII_TG3_CTRL_ENABLE_AS_MASTER);
+			tg3_writephy(tp, MII_TG3_CTRL, new_adv);
+		} else {
+			tg3_writephy(tp, MII_TG3_CTRL, 0);
+		}
+	} else {
+		/* Asking for a specific link mode. */
+		if (tp->link_config.speed == SPEED_1000) {
+			new_adv = ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP;
+			tg3_writephy(tp, MII_ADVERTISE, new_adv);
+
+			if (tp->link_config.duplex == DUPLEX_FULL)
+				new_adv = MII_TG3_CTRL_ADV_1000_FULL;
+			else
+				new_adv = MII_TG3_CTRL_ADV_1000_HALF;
+			if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+			    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0)
+				new_adv |= (MII_TG3_CTRL_AS_MASTER |
+					    MII_TG3_CTRL_ENABLE_AS_MASTER);
+			tg3_writephy(tp, MII_TG3_CTRL, new_adv);
+		} else {
+			tg3_writephy(tp, MII_TG3_CTRL, 0);
+
+			new_adv = ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP;
+			if (tp->link_config.speed == SPEED_100) {
+				if (tp->link_config.duplex == DUPLEX_FULL)
+					new_adv |= ADVERTISE_100FULL;
+				else
+					new_adv |= ADVERTISE_100HALF;
+			} else {
+				if (tp->link_config.duplex == DUPLEX_FULL)
+					new_adv |= ADVERTISE_10FULL;
+				else
+					new_adv |= ADVERTISE_10HALF;
+			}
+			tg3_writephy(tp, MII_ADVERTISE, new_adv);
+		}
+	}
+
+	if (tp->link_config.autoneg == AUTONEG_DISABLE &&
+	    tp->link_config.speed != SPEED_INVALID) {
+		u32 bmcr, orig_bmcr;
+
+		tp->link_config.active_speed = tp->link_config.speed;
+		tp->link_config.active_duplex = tp->link_config.duplex;
+
+		bmcr = 0;
+		switch (tp->link_config.speed) {
+		default:
+		case SPEED_10:
+			break;
+
+		case SPEED_100:
+			bmcr |= BMCR_SPEED100;
+			break;
+
+		case SPEED_1000:
+			bmcr |= TG3_BMCR_SPEED1000;
+			break;
+		};
+
+		if (tp->link_config.duplex == DUPLEX_FULL)
+			bmcr |= BMCR_FULLDPLX;
+
+		tg3_readphy(tp, MII_BMCR, &orig_bmcr);
+		if (bmcr != orig_bmcr) {
+			tg3_writephy(tp, MII_BMCR, BMCR_LOOPBACK);
+			for (i = 0; i < 15000; i++) {
+				u32 tmp;
+
+				udelay(10);
+				tg3_readphy(tp, MII_BMSR, &tmp);
+				tg3_readphy(tp, MII_BMSR, &tmp);
+				if (!(tmp & BMSR_LSTATUS)) {
+					udelay(40);
+					break;
+				}
+			}
+			tg3_writephy(tp, MII_BMCR, bmcr);
+			udelay(40);
+		}
+	} else {
+		tg3_writephy(tp, MII_BMCR,
+			     BMCR_ANENABLE | BMCR_ANRESTART);
+	}
+
+	if (wait_for_link) {
+		tp->link_config.active_speed = SPEED_INVALID;
+		tp->link_config.active_duplex = DUPLEX_INVALID;
+		for (i = 0; i < 300000; i++) {
+			u32 tmp;
+
+			udelay(10);
+			tg3_readphy(tp, MII_BMSR, &tmp);
+			tg3_readphy(tp, MII_BMSR, &tmp);
+			if (!(tmp & BMSR_LSTATUS))
+				continue;
+
+			tg3_readphy(tp, MII_TG3_AUX_STAT, &tmp);
+			tg3_aux_stat_to_speed_duplex(tp, tmp,
+						     &tp->link_config.active_speed,
+						     &tp->link_config.active_duplex);
+		}
+		if (tp->link_config.active_speed == SPEED_INVALID)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int tg3_init_5401phy_dsp(struct tg3 *tp)
+{
+	int err;
+
+	/* Turn off tap power management. */
+	err  = tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c20);
+
+	err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x0012);
+	err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x1804);
+
+	err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x0013);
+	err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x1204);
+
+	err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8006);
+	err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x0132);
+
+	err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8006);
+	err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x0232);
+
+	err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x201f);
+	err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x0a20);
+
+	udelay(40);
+
+	return err;
+}
+
+static int tg3_setup_copper_phy(struct tg3 *tp)
+{
+	int current_link_up;
+	u32 bmsr, dummy;
+	u16 current_speed;
+	u8 current_duplex;
+	int i, err;
+
+	tw32(MAC_STATUS,
+	     (MAC_STATUS_SYNC_CHANGED |
+	      MAC_STATUS_CFG_CHANGED));
+	tr32(MAC_STATUS);
+	udelay(40);
+
+	tp->mi_mode = MAC_MI_MODE_BASE;
+	tw32(MAC_MI_MODE, tp->mi_mode);
+	tr32(MAC_MI_MODE);
+	udelay(40);
+
+	tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x02);
+
+	if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) {
+		tg3_readphy(tp, MII_BMSR, &bmsr);
+		tg3_readphy(tp, MII_BMSR, &bmsr);
+
+		if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE))
+			bmsr = 0;
+
+		if (!(bmsr & BMSR_LSTATUS)) {
+			err = tg3_init_5401phy_dsp(tp);
+			if (err)
+				return err;
+
+			tg3_readphy(tp, MII_BMSR, &bmsr);
+			for (i = 0; i < 1000; i++) {
+				udelay(10);
+				tg3_readphy(tp, MII_BMSR, &bmsr);
+				if (bmsr & BMSR_LSTATUS) {
+					udelay(40);
+					break;
+				}
+			}
+
+			if ((tp->phy_id & PHY_ID_REV_MASK) == PHY_REV_BCM5401_B0 &&
+			    !(bmsr & BMSR_LSTATUS) &&
+			    tp->link_config.active_speed == SPEED_1000) {
+				err = tg3_phy_reset(tp, 1);
+				if (!err)
+					err = tg3_init_5401phy_dsp(tp);
+				if (err)
+					return err;
+			}
+		}
+	} else if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+		   tp->pci_chip_rev_id == CHIPREV_ID_5701_B0) {
+		/* 5701 {A0,B0} CRC bug workaround */
+		tg3_writephy(tp, 0x15, 0x0a75);
+		tg3_writephy(tp, 0x1c, 0x8c68);
+		tg3_writephy(tp, 0x1c, 0x8d68);
+		tg3_writephy(tp, 0x1c, 0x8c68);
+	}
+
+	/* Clear pending interrupts... */
+	tg3_readphy(tp, MII_TG3_ISTAT, &dummy);
+	tg3_readphy(tp, MII_TG3_ISTAT, &dummy);
+
+	if (tp->tg3_flags & TG3_FLAG_USE_MI_INTERRUPT)
+		tg3_writephy(tp, MII_TG3_IMASK, ~MII_TG3_INT_LINKCHG);
+	else
+		tg3_writephy(tp, MII_TG3_IMASK, ~0);
+
+	if (tp->led_mode == led_mode_three_link)
+		tg3_writephy(tp, MII_TG3_EXT_CTRL,
+			     MII_TG3_EXT_CTRL_LNK3_LED_MODE);
+	else
+		tg3_writephy(tp, MII_TG3_EXT_CTRL, 0);
+
+	current_link_up = 0;
+	current_speed = SPEED_INVALID;
+	current_duplex = DUPLEX_INVALID;
+
+	tg3_readphy(tp, MII_BMSR, &bmsr);
+	tg3_readphy(tp, MII_BMSR, &bmsr);
+
+	if (bmsr & BMSR_LSTATUS) {
+		u32 aux_stat, bmcr;
+
+		tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat);
+		for (i = 0; i < 2000; i++) {
+			udelay(10);
+			tg3_readphy(tp, MII_TG3_AUX_STAT, &aux_stat);
+			if (aux_stat)
+				break;
+		}
+
+		tg3_aux_stat_to_speed_duplex(tp, aux_stat,
+					     &current_speed,
+					     &current_duplex);
+		tg3_readphy(tp, MII_BMCR, &bmcr);
+		tg3_readphy(tp, MII_BMCR, &bmcr);
+		if (tp->link_config.autoneg == AUTONEG_ENABLE) {
+			if (bmcr & BMCR_ANENABLE) {
+				u32 gig_ctrl;
+
+				current_link_up = 1;
+
+				/* Force autoneg restart if we are exiting
+				 * low power mode.
+				 */
+				tg3_readphy(tp, MII_TG3_CTRL, &gig_ctrl);
+				if (!(gig_ctrl & (MII_TG3_CTRL_ADV_1000_HALF |
+						  MII_TG3_CTRL_ADV_1000_FULL))) {
+					current_link_up = 0;
+				}
+			} else {
+				current_link_up = 0;
+			}
+		} else {
+			if (!(bmcr & BMCR_ANENABLE) &&
+			    tp->link_config.speed == current_speed &&
+			    tp->link_config.duplex == current_duplex) {
+				current_link_up = 1;
+			} else {
+				current_link_up = 0;
+			}
+		}
+
+		tp->link_config.active_speed = current_speed;
+		tp->link_config.active_duplex = current_duplex;
+	}
+
+	if (current_link_up == 1 &&
+	    (tp->link_config.active_duplex == DUPLEX_FULL) &&
+	    (tp->link_config.autoneg == AUTONEG_ENABLE)) {
+		u32 local_adv, remote_adv;
+
+		tg3_readphy(tp, MII_ADVERTISE, &local_adv);
+		local_adv &= (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM);
+
+		tg3_readphy(tp, MII_LPA, &remote_adv);
+		remote_adv &= (LPA_PAUSE_CAP | LPA_PAUSE_ASYM);
+
+		/* If we are not advertising full pause capability,
+		 * something is wrong.  Bring the link down and reconfigure.
+		 */
+		if (local_adv != ADVERTISE_PAUSE_CAP) {
+			current_link_up = 0;
+		} else {
+			tg3_setup_flow_control(tp, local_adv, remote_adv);
+		}
+	}
+
+	if (current_link_up == 0) {
+		u32 tmp;
+
+		tg3_phy_copper_begin(tp, 0);
+
+		tg3_readphy(tp, MII_BMSR, &tmp);
+		tg3_readphy(tp, MII_BMSR, &tmp);
+		if (tmp & BMSR_LSTATUS)
+			current_link_up = 1;
+	}
+
+	tp->mac_mode &= ~MAC_MODE_PORT_MODE_MASK;
+	if (current_link_up == 1) {
+		if (tp->link_config.active_speed == SPEED_100 ||
+		    tp->link_config.active_speed == SPEED_10)
+			tp->mac_mode |= MAC_MODE_PORT_MODE_MII;
+		else
+			tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
+	} else
+		tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
+
+	tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX;
+	if (tp->link_config.active_duplex == DUPLEX_HALF)
+		tp->mac_mode |= MAC_MODE_HALF_DUPLEX;
+
+	tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
+		if ((tp->led_mode == led_mode_link10) ||
+		    (current_link_up == 1 &&
+		     tp->link_config.active_speed == SPEED_10))
+			tp->mac_mode |= MAC_MODE_LINK_POLARITY;
+	} else {
+		if (current_link_up == 1)
+			tp->mac_mode |= MAC_MODE_LINK_POLARITY;
+		tw32(MAC_LED_CTRL, LED_CTRL_PHY_MODE_1);
+	}
+
+	/* ??? Without this setting Netgear GA302T PHY does not
+	 * ??? send/receive packets...
+	 */
+	if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5411 &&
+	    tp->pci_chip_rev_id == CHIPREV_ID_5700_ALTIMA) {
+		tp->mi_mode |= MAC_MI_MODE_AUTO_POLL;
+		tw32(MAC_MI_MODE, tp->mi_mode);
+		tr32(MAC_MI_MODE);
+		udelay(40);
+	}
+
+	tw32(MAC_MODE, tp->mac_mode);
+	tr32(MAC_MODE);
+	udelay(40);
+
+	if (tp->tg3_flags &
+	    (TG3_FLAG_USE_LINKCHG_REG |
+	     TG3_FLAG_POLL_SERDES)) {
+		/* Polled via timer. */
+		tw32(MAC_EVENT, 0);
+	} else {
+		tw32(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED);
+	}
+	tr32(MAC_EVENT);
+	udelay(40);
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 &&
+	    current_link_up == 1 &&
+	    tp->link_config.active_speed == SPEED_1000 &&
+	    ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ||
+	     (tp->tg3_flags & TG3_FLAG_PCI_HIGH_SPEED))) {
+		udelay(120);
+		tw32(MAC_STATUS,
+		     (MAC_STATUS_SYNC_CHANGED |
+		      MAC_STATUS_CFG_CHANGED));
+		tr32(MAC_STATUS);
+		udelay(40);
+		tg3_write_mem(tp,
+			      NIC_SRAM_FIRMWARE_MBOX,
+			      NIC_SRAM_FIRMWARE_MBOX_MAGIC2);
+	}
+
+	if (current_link_up != netif_carrier_ok(tp->dev)) {
+		if (current_link_up)
+			netif_carrier_on(tp->dev);
+		else
+			netif_carrier_off(tp->dev);
+		tg3_link_report(tp);
+	}
+
+	return 0;
+}
+
+struct tg3_fiber_aneginfo {
+	int state;
+#define ANEG_STATE_UNKNOWN		0
+#define ANEG_STATE_AN_ENABLE		1
+#define ANEG_STATE_RESTART_INIT		2
+#define ANEG_STATE_RESTART		3
+#define ANEG_STATE_DISABLE_LINK_OK	4
+#define ANEG_STATE_ABILITY_DETECT_INIT	5
+#define ANEG_STATE_ABILITY_DETECT	6
+#define ANEG_STATE_ACK_DETECT_INIT	7
+#define ANEG_STATE_ACK_DETECT		8
+#define ANEG_STATE_COMPLETE_ACK_INIT	9
+#define ANEG_STATE_COMPLETE_ACK		10
+#define ANEG_STATE_IDLE_DETECT_INIT	11
+#define ANEG_STATE_IDLE_DETECT		12
+#define ANEG_STATE_LINK_OK		13
+#define ANEG_STATE_NEXT_PAGE_WAIT_INIT	14
+#define ANEG_STATE_NEXT_PAGE_WAIT	15
+
+	u32 flags;
+#define MR_AN_ENABLE		0x00000001
+#define MR_RESTART_AN		0x00000002
+#define MR_AN_COMPLETE		0x00000004
+#define MR_PAGE_RX		0x00000008
+#define MR_NP_LOADED		0x00000010
+#define MR_TOGGLE_TX		0x00000020
+#define MR_LP_ADV_FULL_DUPLEX	0x00000040
+#define MR_LP_ADV_HALF_DUPLEX	0x00000080
+#define MR_LP_ADV_SYM_PAUSE	0x00000100
+#define MR_LP_ADV_ASYM_PAUSE	0x00000200
+#define MR_LP_ADV_REMOTE_FAULT1	0x00000400
+#define MR_LP_ADV_REMOTE_FAULT2	0x00000800
+#define MR_LP_ADV_NEXT_PAGE	0x00001000
+#define MR_TOGGLE_RX		0x00002000
+#define MR_NP_RX		0x00004000
+
+#define MR_LINK_OK		0x80000000
+
+	unsigned long link_time, cur_time;
+
+	u32 ability_match_cfg;
+	int ability_match_count;
+
+	char ability_match, idle_match, ack_match;
+
+	u32 txconfig, rxconfig;
+#define ANEG_CFG_NP		0x00000080
+#define ANEG_CFG_ACK		0x00000040
+#define ANEG_CFG_RF2		0x00000020
+#define ANEG_CFG_RF1		0x00000010
+#define ANEG_CFG_PS2		0x00000001
+#define ANEG_CFG_PS1		0x00008000
+#define ANEG_CFG_HD		0x00004000
+#define ANEG_CFG_FD		0x00002000
+#define ANEG_CFG_INVAL		0x00001f06
+
+};
+#define ANEG_OK		0
+#define ANEG_DONE	1
+#define ANEG_TIMER_ENAB	2
+#define ANEG_FAILED	-1
+
+#define ANEG_STATE_SETTLE_TIME	10000
+
+static int tg3_fiber_aneg_smachine(struct tg3 *tp,
+				   struct tg3_fiber_aneginfo *ap)
+{
+	unsigned long delta;
+	u32 rx_cfg_reg;
+	int ret;
+
+	if (ap->state == ANEG_STATE_UNKNOWN) {
+		ap->rxconfig = 0;
+		ap->link_time = 0;
+		ap->cur_time = 0;
+		ap->ability_match_cfg = 0;
+		ap->ability_match_count = 0;
+		ap->ability_match = 0;
+		ap->idle_match = 0;
+		ap->ack_match = 0;
+	}
+	ap->cur_time++;
+
+	if (tr32(MAC_STATUS) & MAC_STATUS_RCVD_CFG) {
+		rx_cfg_reg = tr32(MAC_RX_AUTO_NEG);
+
+		if (rx_cfg_reg != ap->ability_match_cfg) {
+			ap->ability_match_cfg = rx_cfg_reg;
+			ap->ability_match = 0;
+			ap->ability_match_count = 0;
+		} else {
+			if (++ap->ability_match_count > 1) {
+				ap->ability_match = 1;
+				ap->ability_match_cfg = rx_cfg_reg;
+			}
+		}
+		if (rx_cfg_reg & ANEG_CFG_ACK)
+			ap->ack_match = 1;
+		else
+			ap->ack_match = 0;
+
+		ap->idle_match = 0;
+	} else {
+		ap->idle_match = 1;
+		ap->ability_match_cfg = 0;
+		ap->ability_match_count = 0;
+		ap->ability_match = 0;
+		ap->ack_match = 0;
+
+		rx_cfg_reg = 0;
+	}
+
+	ap->rxconfig = rx_cfg_reg;
+	ret = ANEG_OK;
+
+	switch(ap->state) {
+	case ANEG_STATE_UNKNOWN:
+		if (ap->flags & (MR_AN_ENABLE | MR_RESTART_AN))
+			ap->state = ANEG_STATE_AN_ENABLE;
+
+		/* fallthru */
+	case ANEG_STATE_AN_ENABLE:
+		ap->flags &= ~(MR_AN_COMPLETE | MR_PAGE_RX);
+		if (ap->flags & MR_AN_ENABLE) {
+			ap->link_time = 0;
+			ap->cur_time = 0;
+			ap->ability_match_cfg = 0;
+			ap->ability_match_count = 0;
+			ap->ability_match = 0;
+			ap->idle_match = 0;
+			ap->ack_match = 0;
+
+			ap->state = ANEG_STATE_RESTART_INIT;
+		} else {
+			ap->state = ANEG_STATE_DISABLE_LINK_OK;
+		}
+		break;
+
+	case ANEG_STATE_RESTART_INIT:
+		ap->link_time = ap->cur_time;
+		ap->flags &= ~(MR_NP_LOADED);
+		ap->txconfig = 0;
+		tw32(MAC_TX_AUTO_NEG, 0);
+		tp->mac_mode |= MAC_MODE_SEND_CONFIGS;
+		tw32(MAC_MODE, tp->mac_mode);
+		tr32(MAC_MODE);
+		udelay(40);
+
+		ret = ANEG_TIMER_ENAB;
+		ap->state = ANEG_STATE_RESTART;
+
+		/* fallthru */
+	case ANEG_STATE_RESTART:
+		delta = ap->cur_time - ap->link_time;
+		if (delta > ANEG_STATE_SETTLE_TIME) {
+			ap->state = ANEG_STATE_ABILITY_DETECT_INIT;
+		} else {
+			ret = ANEG_TIMER_ENAB;
+		}
+		break;
+
+	case ANEG_STATE_DISABLE_LINK_OK:
+		ret = ANEG_DONE;
+		break;
+
+	case ANEG_STATE_ABILITY_DETECT_INIT:
+		ap->flags &= ~(MR_TOGGLE_TX);
+		ap->txconfig = (ANEG_CFG_FD | ANEG_CFG_PS1);
+		tw32(MAC_TX_AUTO_NEG, ap->txconfig);
+		tp->mac_mode |= MAC_MODE_SEND_CONFIGS;
+		tw32(MAC_MODE, tp->mac_mode);
+		tr32(MAC_MODE);
+		udelay(40);
+
+		ap->state = ANEG_STATE_ABILITY_DETECT;
+		break;
+
+	case ANEG_STATE_ABILITY_DETECT:
+		if (ap->ability_match != 0 && ap->rxconfig != 0) {
+			ap->state = ANEG_STATE_ACK_DETECT_INIT;
+		}
+		break;
+
+	case ANEG_STATE_ACK_DETECT_INIT:
+		ap->txconfig |= ANEG_CFG_ACK;
+		tw32(MAC_TX_AUTO_NEG, ap->txconfig);
+		tp->mac_mode |= MAC_MODE_SEND_CONFIGS;
+		tw32(MAC_MODE, tp->mac_mode);
+		tr32(MAC_MODE);
+		udelay(40);
+
+		ap->state = ANEG_STATE_ACK_DETECT;
+
+		/* fallthru */
+	case ANEG_STATE_ACK_DETECT:
+		if (ap->ack_match != 0) {
+			if ((ap->rxconfig & ~ANEG_CFG_ACK) ==
+			    (ap->ability_match_cfg & ~ANEG_CFG_ACK)) {
+				ap->state = ANEG_STATE_COMPLETE_ACK_INIT;
+			} else {
+				ap->state = ANEG_STATE_AN_ENABLE;
+			}
+		} else if (ap->ability_match != 0 &&
+			   ap->rxconfig == 0) {
+			ap->state = ANEG_STATE_AN_ENABLE;
+		}
+		break;
+
+	case ANEG_STATE_COMPLETE_ACK_INIT:
+		if (ap->rxconfig & ANEG_CFG_INVAL) {
+			ret = ANEG_FAILED;
+			break;
+		}
+		ap->flags &= ~(MR_LP_ADV_FULL_DUPLEX |
+			       MR_LP_ADV_HALF_DUPLEX |
+			       MR_LP_ADV_SYM_PAUSE |
+			       MR_LP_ADV_ASYM_PAUSE |
+			       MR_LP_ADV_REMOTE_FAULT1 |
+			       MR_LP_ADV_REMOTE_FAULT2 |
+			       MR_LP_ADV_NEXT_PAGE |
+			       MR_TOGGLE_RX |
+			       MR_NP_RX);
+		if (ap->rxconfig & ANEG_CFG_FD)
+			ap->flags |= MR_LP_ADV_FULL_DUPLEX;
+		if (ap->rxconfig & ANEG_CFG_HD)
+			ap->flags |= MR_LP_ADV_HALF_DUPLEX;
+		if (ap->rxconfig & ANEG_CFG_PS1)
+			ap->flags |= MR_LP_ADV_SYM_PAUSE;
+		if (ap->rxconfig & ANEG_CFG_PS2)
+			ap->flags |= MR_LP_ADV_ASYM_PAUSE;
+		if (ap->rxconfig & ANEG_CFG_RF1)
+			ap->flags |= MR_LP_ADV_REMOTE_FAULT1;
+		if (ap->rxconfig & ANEG_CFG_RF2)
+			ap->flags |= MR_LP_ADV_REMOTE_FAULT2;
+		if (ap->rxconfig & ANEG_CFG_NP)
+			ap->flags |= MR_LP_ADV_NEXT_PAGE;
+
+		ap->link_time = ap->cur_time;
+
+		ap->flags ^= (MR_TOGGLE_TX);
+		if (ap->rxconfig & 0x0008)
+			ap->flags |= MR_TOGGLE_RX;
+		if (ap->rxconfig & ANEG_CFG_NP)
+			ap->flags |= MR_NP_RX;
+		ap->flags |= MR_PAGE_RX;
+
+		ap->state = ANEG_STATE_COMPLETE_ACK;
+		ret = ANEG_TIMER_ENAB;
+		break;
+
+	case ANEG_STATE_COMPLETE_ACK:
+		if (ap->ability_match != 0 &&
+		    ap->rxconfig == 0) {
+			ap->state = ANEG_STATE_AN_ENABLE;
+			break;
+		}
+		delta = ap->cur_time - ap->link_time;
+		if (delta > ANEG_STATE_SETTLE_TIME) {
+			if (!(ap->flags & (MR_LP_ADV_NEXT_PAGE))) {
+				ap->state = ANEG_STATE_IDLE_DETECT_INIT;
+			} else {
+				if ((ap->txconfig & ANEG_CFG_NP) == 0 &&
+				    !(ap->flags & MR_NP_RX)) {
+					ap->state = ANEG_STATE_IDLE_DETECT_INIT;
+				} else {
+					ret = ANEG_FAILED;
+				}
+			}
+		}
+		break;
+
+	case ANEG_STATE_IDLE_DETECT_INIT:
+		ap->link_time = ap->cur_time;
+		tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS;
+		tw32(MAC_MODE, tp->mac_mode);
+		tr32(MAC_MODE);
+		udelay(40);
+
+		ap->state = ANEG_STATE_IDLE_DETECT;
+		ret = ANEG_TIMER_ENAB;
+		break;
+
+	case ANEG_STATE_IDLE_DETECT:
+		if (ap->ability_match != 0 &&
+		    ap->rxconfig == 0) {
+			ap->state = ANEG_STATE_AN_ENABLE;
+			break;
+		}
+		delta = ap->cur_time - ap->link_time;
+		if (delta > ANEG_STATE_SETTLE_TIME) {
+			/* XXX another gem from the Broadcom driver :( */
+			ap->state = ANEG_STATE_LINK_OK;
+		}
+		break;
+
+	case ANEG_STATE_LINK_OK:
+		ap->flags |= (MR_AN_COMPLETE | MR_LINK_OK);
+		ret = ANEG_DONE;
+		break;
+
+	case ANEG_STATE_NEXT_PAGE_WAIT_INIT:
+		/* ??? unimplemented */
+		break;
+
+	case ANEG_STATE_NEXT_PAGE_WAIT:
+		/* ??? unimplemented */
+		break;
+
+	default:
+		ret = ANEG_FAILED;
+		break;
+	};
+
+	return ret;
+}
+
+static int tg3_setup_fiber_phy(struct tg3 *tp)
+{
+	u32 orig_pause_cfg;
+	u16 orig_active_speed;
+	u8 orig_active_duplex;
+	int current_link_up;
+	int i;
+
+	orig_pause_cfg =
+		(tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
+				  TG3_FLAG_TX_PAUSE));
+	orig_active_speed = tp->link_config.active_speed;
+	orig_active_duplex = tp->link_config.active_duplex;
+
+	tp->mac_mode &= ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
+	tp->mac_mode |= MAC_MODE_PORT_MODE_TBI;
+	tw32(MAC_MODE, tp->mac_mode);
+	tr32(MAC_MODE);
+	udelay(40);
+
+	/* Reset when initting first time or we have a link. */
+	if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+	    (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) {
+		/* Set PLL lock range. */
+		tg3_writephy(tp, 0x16, 0x8007);
+
+		/* SW reset */
+		tg3_writephy(tp, MII_BMCR, BMCR_RESET);
+
+		/* Wait for reset to complete. */
+		/* XXX schedule_timeout() ... */
+		for (i = 0; i < 500; i++)
+			udelay(10);
+
+		/* Config mode; select PMA/Ch 1 regs. */
+		tg3_writephy(tp, 0x10, 0x8411);
+
+		/* Enable auto-lock and comdet, select txclk for tx. */
+		tg3_writephy(tp, 0x11, 0x0a10);
+
+		tg3_writephy(tp, 0x18, 0x00a0);
+		tg3_writephy(tp, 0x16, 0x41ff);
+
+		/* Assert and deassert POR. */
+		tg3_writephy(tp, 0x13, 0x0400);
+		udelay(40);
+		tg3_writephy(tp, 0x13, 0x0000);
+
+		tg3_writephy(tp, 0x11, 0x0a50);
+		udelay(40);
+		tg3_writephy(tp, 0x11, 0x0a10);
+
+		/* Wait for signal to stabilize */
+		/* XXX schedule_timeout() ... */
+		for (i = 0; i < 15000; i++)
+			udelay(10);
+
+		/* Deselect the channel register so we can read the PHYID
+		 * later.
+		 */
+		tg3_writephy(tp, 0x10, 0x8011);
+	}
+
+	/* Enable link change interrupt unless serdes polling.  */
+	if (!(tp->tg3_flags & TG3_FLAG_POLL_SERDES))
+		tw32(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED);
+	else
+		tw32(MAC_EVENT, 0);
+	tr32(MAC_EVENT);
+	udelay(40);
+
+	current_link_up = 0;
+	if (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) {
+		if (tp->link_config.autoneg == AUTONEG_ENABLE &&
+		    !(tp->tg3_flags & TG3_FLAG_GOT_SERDES_FLOWCTL)) {
+			struct tg3_fiber_aneginfo aninfo;
+			int status = ANEG_FAILED;
+			unsigned int tick;
+			u32 tmp;
+
+			memset(&aninfo, 0, sizeof(aninfo));
+			aninfo.flags |= (MR_AN_ENABLE);
+
+			tw32(MAC_TX_AUTO_NEG, 0);
+
+			tmp = tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK;
+			tw32(MAC_MODE, tmp | MAC_MODE_PORT_MODE_GMII);
+			tr32(MAC_MODE);
+			udelay(40);
+
+			tw32(MAC_MODE, tp->mac_mode | MAC_MODE_SEND_CONFIGS);
+			tr32(MAC_MODE);
+			udelay(40);
+
+			aninfo.state = ANEG_STATE_UNKNOWN;
+			aninfo.cur_time = 0;
+			tick = 0;
+			while (++tick < 195000) {
+				status = tg3_fiber_aneg_smachine(tp, &aninfo);
+				if (status == ANEG_DONE ||
+				    status == ANEG_FAILED)
+					break;
+
+				udelay(1);
+			}
+
+			tp->mac_mode &= ~MAC_MODE_SEND_CONFIGS;
+			tw32(MAC_MODE, tp->mac_mode);
+			tr32(MAC_MODE);
+			udelay(40);
+
+			if (status == ANEG_DONE &&
+			    (aninfo.flags &
+			     (MR_AN_COMPLETE | MR_LINK_OK |
+			      MR_LP_ADV_FULL_DUPLEX))) {
+				u32 local_adv, remote_adv;
+
+				local_adv = ADVERTISE_PAUSE_CAP;
+				remote_adv = 0;
+				if (aninfo.flags & MR_LP_ADV_SYM_PAUSE)
+					remote_adv |= LPA_PAUSE_CAP;
+				if (aninfo.flags & MR_LP_ADV_ASYM_PAUSE)
+					remote_adv |= LPA_PAUSE_ASYM;
+
+				tg3_setup_flow_control(tp, local_adv, remote_adv);
+
+				tp->tg3_flags |=
+					TG3_FLAG_GOT_SERDES_FLOWCTL;
+				current_link_up = 1;
+			}
+			for (i = 0; i < 60; i++) {
+				udelay(20);
+				tw32(MAC_STATUS,
+				     (MAC_STATUS_SYNC_CHANGED |
+				      MAC_STATUS_CFG_CHANGED));
+				tr32(MAC_STATUS);
+				udelay(40);
+				if ((tr32(MAC_STATUS) &
+				     (MAC_STATUS_SYNC_CHANGED |
+				      MAC_STATUS_CFG_CHANGED)) == 0)
+					break;
+			}
+			if (current_link_up == 0 &&
+			    (tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED)) {
+				current_link_up = 1;
+			}
+		} else {
+			/* Forcing 1000FD link up. */
+			current_link_up = 1;
+		}
+	}
+
+	tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
+	tw32(MAC_MODE, tp->mac_mode);
+	tr32(MAC_MODE);
+	udelay(40);
+
+	tp->hw_status->status =
+		(SD_STATUS_UPDATED |
+		 (tp->hw_status->status & ~SD_STATUS_LINK_CHG));
+
+	for (i = 0; i < 100; i++) {
+		udelay(20);
+		tw32(MAC_STATUS,
+		     (MAC_STATUS_SYNC_CHANGED |
+		      MAC_STATUS_CFG_CHANGED));
+		tr32(MAC_STATUS);
+		udelay(40);
+		if ((tr32(MAC_STATUS) &
+		     (MAC_STATUS_SYNC_CHANGED |
+		      MAC_STATUS_CFG_CHANGED)) == 0)
+			break;
+	}
+
+	if ((tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) == 0)
+		current_link_up = 0;
+
+	if (current_link_up == 1) {
+		tp->link_config.active_speed = SPEED_1000;
+		tp->link_config.active_duplex = DUPLEX_FULL;
+	} else {
+		tp->link_config.active_speed = SPEED_INVALID;
+		tp->link_config.active_duplex = DUPLEX_INVALID;
+	}
+
+	if (current_link_up != netif_carrier_ok(tp->dev)) {
+		if (current_link_up)
+			netif_carrier_on(tp->dev);
+		else
+			netif_carrier_off(tp->dev);
+		tg3_link_report(tp);
+	} else {
+		u32 now_pause_cfg =
+			tp->tg3_flags & (TG3_FLAG_RX_PAUSE |
+					 TG3_FLAG_TX_PAUSE);
+		if (orig_pause_cfg != now_pause_cfg ||
+		    orig_active_speed != tp->link_config.active_speed ||
+		    orig_active_duplex != tp->link_config.active_duplex)
+			tg3_link_report(tp);
+	}
+
+	if ((tr32(MAC_STATUS) & MAC_STATUS_PCS_SYNCED) == 0) {
+		tw32(MAC_MODE, tp->mac_mode | MAC_MODE_LINK_POLARITY);
+		tr32(MAC_MODE);
+		udelay(40);
+		if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) {
+			tw32(MAC_MODE, tp->mac_mode);
+			tr32(MAC_MODE);
+			udelay(40);
+		}
+	}
+
+	return 0;
+}
+
+static int tg3_setup_phy(struct tg3 *tp)
+{
+	int err;
+
+	if (tp->phy_id == PHY_ID_SERDES) {
+		err = tg3_setup_fiber_phy(tp);
+	} else {
+		err = tg3_setup_copper_phy(tp);
+	}
+
+	if (tp->link_config.active_speed == SPEED_1000 &&
+	    tp->link_config.active_duplex == DUPLEX_HALF)
+		tw32(MAC_TX_LENGTHS,
+		     ((2 << TX_LENGTHS_IPG_CRS_SHIFT) |
+		      (6 << TX_LENGTHS_IPG_SHIFT) |
+		      (0xff << TX_LENGTHS_SLOT_TIME_SHIFT)));
+	else
+		tw32(MAC_TX_LENGTHS,
+		     ((2 << TX_LENGTHS_IPG_CRS_SHIFT) |
+		      (6 << TX_LENGTHS_IPG_SHIFT) |
+		      (32 << TX_LENGTHS_SLOT_TIME_SHIFT)));
+
+	return err;
+}
+
+/* Tigon3 never reports partial packet sends.  So we do not
+ * need special logic to handle SKBs that have not had all
+ * of their frags sent yet, like SunGEM does.
+ */
+static void tg3_tx(struct tg3 *tp)
+{
+	u32 hw_idx = tp->hw_status->idx[0].tx_consumer;
+	u32 sw_idx = tp->tx_cons;
+
+	while (sw_idx != hw_idx) {
+		struct tx_ring_info *ri = &tp->tx_buffers[sw_idx];
+		struct sk_buff *skb = ri->skb;
+		int i;
+
+		if (unlikely(skb == NULL))
+			BUG();
+
+		pci_unmap_single(tp->pdev,
+				 pci_unmap_addr(ri, mapping),
+				 (skb->len - skb->data_len),
+				 PCI_DMA_TODEVICE);
+
+		ri->skb = NULL;
+
+		sw_idx = NEXT_TX(sw_idx);
+
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			if (unlikely(sw_idx == hw_idx))
+				BUG();
+
+			ri = &tp->tx_buffers[sw_idx];
+			if (unlikely(ri->skb != NULL))
+				BUG();
+
+			pci_unmap_page(tp->pdev,
+				       pci_unmap_addr(ri, mapping),
+				       skb_shinfo(skb)->frags[i].size,
+				       PCI_DMA_TODEVICE);
+
+			sw_idx = NEXT_TX(sw_idx);
+		}
+
+		dev_kfree_skb_irq(skb);
+	}
+
+	tp->tx_cons = sw_idx;
+
+	if (netif_queue_stopped(tp->dev) &&
+	    (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH))
+		netif_wake_queue(tp->dev);
+}
+
+/* Returns size of skb allocated or < 0 on error.
+ *
+ * We only need to fill in the address because the other members
+ * of the RX descriptor are invariant, see tg3_init_rings.
+ *
+ * Note the purposeful assymetry of cpu vs. chip accesses.  For
+ * posting buffers we only dirty the first cache line of the RX
+ * descriptor (containing the address).  Whereas for the RX status
+ * buffers the cpu only reads the last cacheline of the RX descriptor
+ * (to fetch the error flags, vlan tag, checksum, and opaque cookie).
+ */
+static int tg3_alloc_rx_skb(struct tg3 *tp, u32 opaque_key,
+			    int src_idx, u32 dest_idx_unmasked)
+{
+	struct tg3_rx_buffer_desc *desc;
+	struct ring_info *map, *src_map;
+	struct sk_buff *skb;
+	dma_addr_t mapping;
+	int skb_size, dest_idx;
+
+	src_map = NULL;
+	switch (opaque_key) {
+	case RXD_OPAQUE_RING_STD:
+		dest_idx = dest_idx_unmasked % TG3_RX_RING_SIZE;
+		desc = &tp->rx_std[dest_idx];
+		map = &tp->rx_std_buffers[dest_idx];
+		if (src_idx >= 0)
+			src_map = &tp->rx_std_buffers[src_idx];
+		skb_size = RX_PKT_BUF_SZ;
+		break;
+
+	case RXD_OPAQUE_RING_JUMBO:
+		dest_idx = dest_idx_unmasked % TG3_RX_JUMBO_RING_SIZE;
+		desc = &tp->rx_jumbo[dest_idx];
+		map = &tp->rx_jumbo_buffers[dest_idx];
+		if (src_idx >= 0)
+			src_map = &tp->rx_jumbo_buffers[src_idx];
+		skb_size = RX_JUMBO_PKT_BUF_SZ;
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	/* Do not overwrite any of the map or rp information
+	 * until we are sure we can commit to a new buffer.
+	 *
+	 * Callers depend upon this behavior and assume that
+	 * we leave everything unchanged if we fail.
+	 */
+	skb = dev_alloc_skb(skb_size);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb->dev = tp->dev;
+	skb_reserve(skb, tp->rx_offset);
+
+	mapping = pci_map_single(tp->pdev, skb->data,
+				 skb_size - tp->rx_offset,
+				 PCI_DMA_FROMDEVICE);
+
+	map->skb = skb;
+	pci_unmap_addr_set(map, mapping, mapping);
+
+	if (src_map != NULL)
+		src_map->skb = NULL;
+
+	desc->addr_hi = ((u64)mapping >> 32);
+	desc->addr_lo = ((u64)mapping & 0xffffffff);
+
+	return skb_size;
+}
+
+/* We only need to move over in the address because the other
+ * members of the RX descriptor are invariant.  See notes above
+ * tg3_alloc_rx_skb for full details.
+ */
+static void tg3_recycle_rx(struct tg3 *tp, u32 opaque_key,
+			   int src_idx, u32 dest_idx_unmasked)
+{
+	struct tg3_rx_buffer_desc *src_desc, *dest_desc;
+	struct ring_info *src_map, *dest_map;
+	int dest_idx;
+
+	switch (opaque_key) {
+	case RXD_OPAQUE_RING_STD:
+		dest_idx = dest_idx_unmasked % TG3_RX_RING_SIZE;
+		dest_desc = &tp->rx_std[dest_idx];
+		dest_map = &tp->rx_std_buffers[dest_idx];
+		src_desc = &tp->rx_std[src_idx];
+		src_map = &tp->rx_std_buffers[src_idx];
+		break;
+
+	case RXD_OPAQUE_RING_JUMBO:
+		dest_idx = dest_idx_unmasked % TG3_RX_JUMBO_RING_SIZE;
+		dest_desc = &tp->rx_jumbo[dest_idx];
+		dest_map = &tp->rx_jumbo_buffers[dest_idx];
+		src_desc = &tp->rx_jumbo[src_idx];
+		src_map = &tp->rx_jumbo_buffers[src_idx];
+		break;
+
+	default:
+		return;
+	};
+
+	dest_map->skb = src_map->skb;
+	pci_unmap_addr_set(dest_map, mapping,
+			   pci_unmap_addr(src_map, mapping));
+	dest_desc->addr_hi = src_desc->addr_hi;
+	dest_desc->addr_lo = src_desc->addr_lo;
+
+	src_map->skb = NULL;
+}
+
+#if TG3_VLAN_TAG_USED
+static int tg3_vlan_rx(struct tg3 *tp, struct sk_buff *skb, u16 vlan_tag)
+{
+	return vlan_hwaccel_receive_skb(skb, tp->vlgrp, vlan_tag);
+}
+#endif
+
+/* The RX ring scheme is composed of multiple rings which post fresh
+ * buffers to the chip, and one special ring the chip uses to report
+ * status back to the host.
+ *
+ * The special ring reports the status of received packets to the
+ * host.  The chip does not write into the original descriptor the
+ * RX buffer was obtained from.  The chip simply takes the original
+ * descriptor as provided by the host, updates the status and length
+ * field, then writes this into the next status ring entry.
+ *
+ * Each ring the host uses to post buffers to the chip is described
+ * by a TG3_BDINFO entry in the chips SRAM area.  When a packet arrives,
+ * it is first placed into the on-chip ram.  When the packet's length
+ * is known, it walks down the TG3_BDINFO entries to select the ring.
+ * Each TG3_BDINFO specifies a MAXLEN field and the first TG3_BDINFO
+ * which is within the range of the new packet's length is chosen.
+ *
+ * The "seperate ring for rx status" scheme may sound queer, but it makes
+ * sense from a cache coherency perspective.  If only the host writes
+ * to the buffer post rings, and only the chip writes to the rx status
+ * rings, then cache lines never move beyond shared-modified state.
+ * If both the host and chip were to write into the same ring, cache line
+ * eviction could occur since both entities want it in an exclusive state.
+ */
+static int tg3_rx(struct tg3 *tp, int budget)
+{
+	u32 work_mask;
+	u32 rx_rcb_ptr = tp->rx_rcb_ptr;
+	u16 hw_idx, sw_idx;
+	int received;
+
+	hw_idx = tp->hw_status->idx[0].rx_producer;
+	sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE;
+	work_mask = 0;
+	received = 0;
+	while (sw_idx != hw_idx && budget > 0) {
+		struct tg3_rx_buffer_desc *desc = &tp->rx_rcb[sw_idx];
+		unsigned int len;
+		struct sk_buff *skb;
+		dma_addr_t dma_addr;
+		u32 opaque_key, desc_idx, *post_ptr;
+
+		desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
+		opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
+		if (opaque_key == RXD_OPAQUE_RING_STD) {
+			dma_addr = pci_unmap_addr(&tp->rx_std_buffers[desc_idx],
+						  mapping);
+			skb = tp->rx_std_buffers[desc_idx].skb;
+			post_ptr = &tp->rx_std_ptr;
+		} else if (opaque_key == RXD_OPAQUE_RING_JUMBO) {
+			dma_addr = pci_unmap_addr(&tp->rx_jumbo_buffers[desc_idx],
+						  mapping);
+			skb = tp->rx_jumbo_buffers[desc_idx].skb;
+			post_ptr = &tp->rx_jumbo_ptr;
+		}
+		else {
+			goto next_pkt_nopost;
+		}
+
+		work_mask |= opaque_key;
+
+		if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
+		    (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) {
+		drop_it:
+			tg3_recycle_rx(tp, opaque_key,
+				       desc_idx, *post_ptr);
+		drop_it_no_recycle:
+			/* Other statistics kept track of by card. */
+			tp->net_stats.rx_dropped++;
+			goto next_pkt;
+		}
+
+		len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */
+
+		if (len > RX_COPY_THRESHOLD) {
+			int skb_size;
+
+			skb_size = tg3_alloc_rx_skb(tp, opaque_key,
+						    desc_idx, *post_ptr);
+			if (skb_size < 0)
+				goto drop_it;
+
+			pci_unmap_single(tp->pdev, dma_addr,
+					 skb_size - tp->rx_offset,
+					 PCI_DMA_FROMDEVICE);
+
+			skb_put(skb, len);
+		} else {
+			struct sk_buff *copy_skb;
+
+			tg3_recycle_rx(tp, opaque_key,
+				       desc_idx, *post_ptr);
+
+			copy_skb = dev_alloc_skb(len + 2);
+			if (copy_skb == NULL)
+				goto drop_it_no_recycle;
+
+			copy_skb->dev = tp->dev;
+			skb_reserve(copy_skb, 2);
+			skb_put(copy_skb, len);
+			pci_dma_sync_single(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+			memcpy(copy_skb->data, skb->data, len);
+
+			/* We'll reuse the original ring buffer. */
+			skb = copy_skb;
+		}
+
+		if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) &&
+		    (desc->type_flags & RXD_FLAG_TCPUDP_CSUM)) {
+			skb->csum = htons((desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
+					  >> RXD_TCPCSUM_SHIFT);
+			skb->ip_summed = CHECKSUM_HW;
+		} else {
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+
+		skb->protocol = eth_type_trans(skb, tp->dev);
+#if TG3_VLAN_TAG_USED
+		if (tp->vlgrp != NULL &&
+		    desc->type_flags & RXD_FLAG_VLAN) {
+			tg3_vlan_rx(tp, skb,
+				    desc->err_vlan & RXD_VLAN_MASK);
+		} else
+#endif
+
+#ifdef NAPI
+			netif_receive_skb(skb);
+#else
+		        netif_rx(skb);
+#endif
+		tp->dev->last_rx = jiffies;
+		received++;
+		budget--;
+
+next_pkt:
+		(*post_ptr)++;
+next_pkt_nopost:
+		rx_rcb_ptr++;
+		sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE;
+	}
+
+	/* ACK the status ring. */
+	tp->rx_rcb_ptr = rx_rcb_ptr;
+	tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW,
+		     (rx_rcb_ptr % TG3_RX_RCB_RING_SIZE));
+	if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+		tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW);
+
+	/* Refill RX ring(s). */
+	if (work_mask & RXD_OPAQUE_RING_STD) {
+		sw_idx = tp->rx_std_ptr % TG3_RX_RING_SIZE;
+		tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW,
+			     sw_idx);
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW);
+	}
+	if (work_mask & RXD_OPAQUE_RING_JUMBO) {
+		sw_idx = tp->rx_jumbo_ptr % TG3_RX_JUMBO_RING_SIZE;
+		tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW,
+			     sw_idx);
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW);
+	}
+
+	return received;
+}
+
+static int tg3_poll(struct net_device *netdev, int *budget)
+{
+	struct tg3 *tp = netdev->priv;
+	struct tg3_hw_status *sblk = tp->hw_status;
+	int done;
+#ifdef NAPI
+	unsigned long flags;
+	spin_lock_irqsave(&tp->lock, flags);
+#endif
+	if (!(tp->tg3_flags &
+	      (TG3_FLAG_USE_LINKCHG_REG |
+	       TG3_FLAG_POLL_SERDES))) {
+		if (sblk->status & SD_STATUS_LINK_CHG) {
+			sblk->status = SD_STATUS_UPDATED |
+				(sblk->status & ~SD_STATUS_LINK_CHG);
+			tg3_setup_phy(tp);
+		}
+	}
+
+	if (sblk->idx[0].tx_consumer != tp->tx_cons) {
+		spin_lock(&tp->tx_lock);
+		tg3_tx(tp);
+		spin_unlock(&tp->tx_lock);
+	}
+
+	done = 1;
+	if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) {
+		int work_done;
+#ifdef NAPI
+		int orig_budget = *budget;
+		if (orig_budget > netdev->quota)
+			orig_budget = netdev->quota;
+
+		work_done = tg3_rx(tp, orig_budget);
+		
+		*budget -= work_done;
+		netdev->quota -= work_done;
+
+
+		if (work_done >= orig_budget)
+			done = 0;
+#else
+		work_done = tg3_rx(tp, 1000);
+#endif
+	}
+#ifdef NAPI
+	if (done) {
+		netif_rx_complete(netdev);
+		tg3_enable_ints(tp);
+	}
+
+	spin_unlock_irqrestore(&tp->lock, flags);
+#endif
+	return (done ? 0 : 1);
+}
+
+static inline unsigned int tg3_has_work(struct net_device *dev, struct tg3 *tp)
+{
+	struct tg3_hw_status *sblk = tp->hw_status;
+	unsigned int work_exists = 0;
+
+	if (!(tp->tg3_flags &
+	      (TG3_FLAG_USE_LINKCHG_REG |
+	       TG3_FLAG_POLL_SERDES))) {
+		if (sblk->status & SD_STATUS_LINK_CHG)
+			work_exists = 1;
+	}
+	if (sblk->idx[0].tx_consumer != tp->tx_cons ||
+	    sblk->idx[0].rx_producer != tp->rx_rcb_ptr)
+		work_exists = 1;
+
+	return work_exists;
+}
+
+static void tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct net_device *dev = dev_id;
+	struct tg3 *tp = dev->priv;
+#ifdef NAPI
+	struct tg3_hw_status *sblk = tp->hw_status;
+#endif
+	unsigned long flags;
+
+	spin_lock_irqsave(&tp->lock, flags);
+#if NAPI
+	if (sblk->status & SD_STATUS_UPDATED) {
+		tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+			     0x00000001);
+		tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+		sblk->status &= ~SD_STATUS_UPDATED;
+
+		if (likely(tg3_has_work(dev, tp)))
+			netif_rx_schedule(dev);
+		else {
+			tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+			     	0x00000000);
+			tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+		}
+	}
+#else
+	{
+	  int budget = 1000;
+	  tg3_poll( dev, &budget );
+	}
+#endif
+
+	spin_unlock_irqrestore(&tp->lock, flags);
+}
+
+static void tg3_init_rings(struct tg3 *);
+static int tg3_init_hw(struct tg3 *);
+
+static void tg3_tx_timeout(struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+
+	printk(KERN_ERR PFX "%s: transmit timed out, resetting\n",
+	       dev->name);
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tg3_halt(tp);
+	tg3_init_rings(tp);
+	tg3_init_hw(tp);
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	netif_wake_queue(dev);
+}
+
+#if !PCI_DMA_BUS_IS_PHYS
+static void tg3_set_txd_addr(struct tg3 *tp, int entry, dma_addr_t mapping)
+{
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		struct tg3_tx_buffer_desc *txd = &tp->tx_ring[entry];
+
+		txd->addr_hi = ((u64) mapping >> 32);
+		txd->addr_lo = ((u64) mapping & 0xffffffff);
+	} else {
+		unsigned long txd;
+
+		txd = (tp->regs +
+		       NIC_SRAM_WIN_BASE +
+		       NIC_SRAM_TX_BUFFER_DESC);
+		txd += (entry * TXD_SIZE);
+
+		if (sizeof(dma_addr_t) != sizeof(u32))
+			writel(((u64) mapping >> 32),
+			       txd + TXD_ADDR + TG3_64BIT_REG_HIGH);
+
+		writel(((u64) mapping & 0xffffffff),
+		       txd + TXD_ADDR + TG3_64BIT_REG_LOW);
+	}
+}
+#endif
+
+static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32);
+
+static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
+				       u32 guilty_entry, int guilty_len,
+				       u32 last_plus_one, u32 *start, u32 mss)
+{
+	dma_addr_t new_addr;
+	u32 entry = *start;
+	int i;
+
+#if !PCI_DMA_BUS_IS_PHYS
+	/* IOMMU, just map the guilty area again which is guarenteed to
+	 * use different addresses.
+	 */
+
+	i = 0;
+	while (entry != guilty_entry) {
+		entry = NEXT_TX(entry);
+		i++;
+	}
+	if (i == 0) {
+		new_addr = pci_map_single(tp->pdev, skb->data, guilty_len,
+					  PCI_DMA_TODEVICE);
+	} else {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+
+		new_addr = pci_map_page(tp->pdev,
+					frag->page, frag->page_offset,
+					guilty_len, PCI_DMA_TODEVICE);
+	}
+	pci_unmap_single(tp->pdev, pci_unmap_addr(&tp->tx_buffers[guilty_entry],
+						  mapping),
+			 guilty_len, PCI_DMA_TODEVICE);
+	tg3_set_txd_addr(tp, guilty_entry, new_addr);
+	pci_unmap_addr_set(&tp->tx_buffers[guilty_entry], mapping,
+			   new_addr);
+	*start = last_plus_one;
+#else
+	/* Oh well, no IOMMU, have to allocate a whole new SKB. */
+	struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC);
+
+	if (!new_skb) {
+		dev_kfree_skb(skb);
+		return -1;
+	}
+
+	/* NOTE: Broadcom's driver botches this case up really bad.
+	 *       This is especially true if any of the frag pages
+	 *       are in highmem.  It will instantly oops in that case.
+	 */
+
+	/* New SKB is guarenteed to be linear. */
+	entry = *start;
+	new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
+				  PCI_DMA_TODEVICE);
+	tg3_set_txd(tp, entry, new_addr, new_skb->len,
+		    (skb->ip_summed == CHECKSUM_HW) ?
+		    TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1));
+	*start = NEXT_TX(entry);
+
+	/* Now clean up the sw ring entries. */
+	i = 0;
+	while (entry != last_plus_one) {
+		int len;
+
+		if (i == 0)
+			len = skb->len - skb->data_len;
+		else
+			len = skb_shinfo(skb)->frags[i-1].size;
+		pci_unmap_single(tp->pdev,
+				 pci_unmap_addr(&tp->tx_buffers[entry], mapping),
+				 len, PCI_DMA_TODEVICE);
+		if (i == 0) {
+			tp->tx_buffers[entry].skb = new_skb;
+			pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, new_addr);
+		} else {
+			tp->tx_buffers[entry].skb = NULL;
+		}
+		entry = NEXT_TX(entry);
+	}
+
+	dev_kfree_skb(skb);
+#endif
+
+	return 0;
+}
+
+static void tg3_set_txd(struct tg3 *tp, int entry,
+			dma_addr_t mapping, int len, u32 flags,
+			u32 mss_and_is_end)
+{
+	int is_end = (mss_and_is_end & 0x1);
+	u32 mss = (mss_and_is_end >> 1);
+	u32 vlan_tag = 0;
+
+	if (is_end)
+		flags |= TXD_FLAG_END;
+	if (flags & TXD_FLAG_VLAN) {
+		vlan_tag = flags >> 16;
+		flags &= 0xffff;
+	}
+	vlan_tag |= (mss << TXD_MSS_SHIFT);
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		struct tg3_tx_buffer_desc *txd = &tp->tx_ring[entry];
+
+		txd->addr_hi = ((u64) mapping >> 32);
+		txd->addr_lo = ((u64) mapping & 0xffffffff);
+		txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
+		txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+	} else {
+		struct tx_ring_info *txr = &tp->tx_buffers[entry];
+		unsigned long txd;
+
+		txd = (tp->regs +
+		       NIC_SRAM_WIN_BASE +
+		       NIC_SRAM_TX_BUFFER_DESC);
+		txd += (entry * TXD_SIZE);
+
+		/* Save some PIOs */
+		if (sizeof(dma_addr_t) != sizeof(u32))
+			writel(((u64) mapping >> 32),
+			       txd + TXD_ADDR + TG3_64BIT_REG_HIGH);
+
+		writel(((u64) mapping & 0xffffffff),
+		       txd + TXD_ADDR + TG3_64BIT_REG_LOW);
+		writel(len << TXD_LEN_SHIFT | flags, txd + TXD_LEN_FLAGS);
+		if (txr->prev_vlan_tag != vlan_tag) {
+			writel(vlan_tag << TXD_VLAN_TAG_SHIFT, txd + TXD_VLAN_TAG);
+			txr->prev_vlan_tag = vlan_tag;
+		}
+	}
+}
+
+static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
+{
+	u32 base = (u32) mapping & 0xffffffff;
+
+	return ((base > 0xffffdcc0) &&
+		((u64) mapping >> 32) == 0 &&
+		(base + len + 8 < base));
+}
+
+static int tg3_start_xmit_4gbug(struct sk_buff *skb, struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+	dma_addr_t mapping;
+	unsigned int i;
+	u32 len, entry, base_flags, mss;
+	int would_hit_hwbug;
+	unsigned long flags;
+
+	len = (skb->len - skb->data_len);
+
+	/* No BH disabling for tx_lock here.  We are running in BH disabled
+	 * context and TX reclaim runs via tp->poll inside of a software
+	 * interrupt.  Rejoice!
+	 *
+	 * Actually, things are not so simple.  If we are to take a hw
+	 * IRQ here, we can deadlock, consider:
+	 *
+	 *       CPU1		CPU2
+	 *   tg3_start_xmit
+	 *   take tp->tx_lock
+	 *			tg3_timer
+	 *			take tp->lock
+	 *   tg3_interrupt
+	 *   spin on tp->lock
+	 *			spin on tp->tx_lock
+	 *
+	 * So we really do need to disable interrupts when taking
+	 * tx_lock here.
+	 */
+	spin_lock_irqsave(&tp->tx_lock, flags);
+
+	/* This is a hard error, log it. */
+	if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+		netif_stop_queue(dev);
+		spin_unlock_irqrestore(&tp->tx_lock, flags);
+		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+		       dev->name);
+		return 1;
+	}
+
+	entry = tp->tx_prod;
+	base_flags = 0;
+	if (skb->ip_summed == CHECKSUM_HW)
+		base_flags |= TXD_FLAG_TCPUDP_CSUM;
+#if TG3_DO_TSO != 0
+	if ((mss = skb_shinfo(skb)->tso_size) != 0)
+		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
+			       TXD_FLAG_CPU_POST_DMA);
+#else
+	mss = 0;
+#endif
+#if TG3_VLAN_TAG_USED
+	if (tp->vlgrp != NULL && vlan_tx_tag_present(skb))
+		base_flags |= (TXD_FLAG_VLAN |
+			       (vlan_tx_tag_get(skb) << 16));
+#endif
+
+	/* Queue skb data, a.k.a. the main skb fragment. */
+	mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+
+	tp->tx_buffers[entry].skb = skb;
+	pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+	would_hit_hwbug = 0;
+
+	if (tg3_4g_overflow_test(mapping, len))
+		would_hit_hwbug = entry + 1;
+
+	tg3_set_txd(tp, entry, mapping, len, base_flags,
+		    (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
+
+	entry = NEXT_TX(entry);
+
+	/* Now loop through additional data fragments, and queue them. */
+	if (skb_shinfo(skb)->nr_frags > 0) {
+		unsigned int i, last;
+
+		last = skb_shinfo(skb)->nr_frags - 1;
+		for (i = 0; i <= last; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			len = frag->size;
+			mapping = pci_map_page(tp->pdev,
+					       frag->page,
+					       frag->page_offset,
+					       len, PCI_DMA_TODEVICE);
+
+			tp->tx_buffers[entry].skb = NULL;
+			pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+			if (tg3_4g_overflow_test(mapping, len)) {
+				/* Only one should match. */
+				if (would_hit_hwbug)
+					BUG();
+				would_hit_hwbug = entry + 1;
+			}
+
+			tg3_set_txd(tp, entry, mapping, len,
+				    base_flags, (i == last) | (mss << 1));
+
+			entry = NEXT_TX(entry);
+		}
+	}
+
+	if (would_hit_hwbug) {
+		u32 last_plus_one = entry;
+		u32 start;
+		unsigned int len = 0;
+
+		would_hit_hwbug -= 1;
+		entry = entry - 1 - skb_shinfo(skb)->nr_frags;
+		entry &= (TG3_TX_RING_SIZE - 1);
+		start = entry;
+		i = 0;
+		while (entry != last_plus_one) {
+			if (i == 0)
+				len = skb->len - skb->data_len;
+			else
+				len = skb_shinfo(skb)->frags[i-1].size;
+
+			if (entry == would_hit_hwbug)
+				break;
+
+			i++;
+			entry = NEXT_TX(entry);
+
+		}
+
+		/* If the workaround fails due to memory/mapping
+		 * failure, silently drop this packet.
+		 */
+		if (tigon3_4gb_hwbug_workaround(tp, skb,
+						entry, len,
+						last_plus_one,
+						&start, mss))
+			goto out_unlock;
+
+		entry = start;
+	}
+
+	/* Packets are ready, update Tx producer idx local and on card. */
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		tw32_mailbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+			      TG3_64BIT_REG_LOW), entry);
+		if (tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG)
+			tw32_mailbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+				      TG3_64BIT_REG_LOW), entry);
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_SNDHOST_PROD_IDX_0 +
+			     TG3_64BIT_REG_LOW);
+	} else {
+		/* First, make sure tg3 sees last descriptor fully
+		 * in SRAM.
+		 */
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+			     TG3_64BIT_REG_LOW);
+
+		tw32_mailbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+			      TG3_64BIT_REG_LOW), entry);
+		if (tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG)
+			tw32_mailbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+				      TG3_64BIT_REG_LOW), entry);
+
+		/* Now post the mailbox write itself.  */
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+			     TG3_64BIT_REG_LOW);
+	}
+
+	tp->tx_prod = entry;
+	if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))
+		netif_stop_queue(dev);
+
+out_unlock:
+	spin_unlock_irqrestore(&tp->tx_lock, flags);
+
+	dev->trans_start = jiffies;
+
+	return 0;
+}
+
+static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+	dma_addr_t mapping;
+	u32 len, entry, base_flags, mss;
+	unsigned long flags;
+
+	len = (skb->len - skb->data_len);
+
+	/* No BH disabling for tx_lock here.  We are running in BH disabled
+	 * context and TX reclaim runs via tp->poll inside of a software
+	 * interrupt.  Rejoice!
+	 *
+	 * Actually, things are not so simple.  If we are to take a hw
+	 * IRQ here, we can deadlock, consider:
+	 *
+	 *       CPU1		CPU2
+	 *   tg3_start_xmit
+	 *   take tp->tx_lock
+	 *			tg3_timer
+	 *			take tp->lock
+	 *   tg3_interrupt
+	 *   spin on tp->lock
+	 *			spin on tp->tx_lock
+	 *
+	 * So we really do need to disable interrupts when taking
+	 * tx_lock here.
+	 */
+	spin_lock_irqsave(&tp->tx_lock, flags);
+
+	/* This is a hard error, log it. */
+	if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
+		netif_stop_queue(dev);
+		spin_unlock_irqrestore(&tp->tx_lock, flags);
+		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+		       dev->name);
+		return 1;
+	}
+
+	entry = tp->tx_prod;
+	base_flags = 0;
+	if (skb->ip_summed == CHECKSUM_HW)
+		base_flags |= TXD_FLAG_TCPUDP_CSUM;
+#if TG3_DO_TSO != 0
+	if ((mss = skb_shinfo(skb)->tso_size) != 0)
+		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
+			       TXD_FLAG_CPU_POST_DMA);
+#else
+	mss = 0;
+#endif
+#if TG3_VLAN_TAG_USED
+	if (tp->vlgrp != NULL && vlan_tx_tag_present(skb))
+		base_flags |= (TXD_FLAG_VLAN |
+			       (vlan_tx_tag_get(skb) << 16));
+#endif
+
+	/* Queue skb data, a.k.a. the main skb fragment. */
+	mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+
+	tp->tx_buffers[entry].skb = skb;
+	pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+	tg3_set_txd(tp, entry, mapping, len, base_flags,
+		    (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
+
+	entry = NEXT_TX(entry);
+
+	/* Now loop through additional data fragments, and queue them. */
+	if (skb_shinfo(skb)->nr_frags > 0) {
+		unsigned int i, last;
+
+		last = skb_shinfo(skb)->nr_frags - 1;
+		for (i = 0; i <= last; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+
+			len = frag->size;
+			mapping = pci_map_page(tp->pdev,
+					       frag->page,
+					       frag->page_offset,
+					       len, PCI_DMA_TODEVICE);
+
+			tp->tx_buffers[entry].skb = NULL;
+			pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
+
+			tg3_set_txd(tp, entry, mapping, len,
+				    base_flags, (i == last) | (mss << 1));
+
+			entry = NEXT_TX(entry);
+		}
+	}
+
+	/* Packets are ready, update Tx producer idx local and on card.
+	 * We know this is not a 5700 (by virtue of not being a chip
+	 * requiring the 4GB overflow workaround) so we can safely omit
+	 * the double-write bug tests.
+	 */
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		tw32_mailbox((MAILBOX_SNDHOST_PROD_IDX_0 +
+			      TG3_64BIT_REG_LOW), entry);
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_SNDHOST_PROD_IDX_0 +
+			     TG3_64BIT_REG_LOW);
+	} else {
+		/* First, make sure tg3 sees last descriptor fully
+		 * in SRAM.
+		 */
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+			     TG3_64BIT_REG_LOW);
+
+		tw32_mailbox((MAILBOX_SNDNIC_PROD_IDX_0 +
+			      TG3_64BIT_REG_LOW), entry);
+
+		/* Now post the mailbox write itself.  */
+		if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+			tr32(MAILBOX_SNDNIC_PROD_IDX_0 +
+			     TG3_64BIT_REG_LOW);
+	}
+
+	tp->tx_prod = entry;
+	if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))
+		netif_stop_queue(dev);
+
+	spin_unlock_irqrestore(&tp->tx_lock, flags);
+
+	dev->trans_start = jiffies;
+
+	return 0;
+}
+
+static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp,
+			       int new_mtu)
+{
+	dev->mtu = new_mtu;
+
+	if (new_mtu > ETH_DATA_LEN)
+		tp->tg3_flags |= TG3_FLAG_JUMBO_ENABLE;
+	else
+		tp->tg3_flags &= ~TG3_FLAG_JUMBO_ENABLE;
+}
+
+static int tg3_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct tg3 *tp = dev->priv;
+
+	if (new_mtu < TG3_MIN_MTU || new_mtu > TG3_MAX_MTU)
+		return -EINVAL;
+
+	if (!netif_running(dev)) {
+		/* We'll just catch it later when the
+		 * device is up'd.
+		 */
+		tg3_set_mtu(dev, tp, new_mtu);
+		return 0;
+	}
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tg3_halt(tp);
+
+	tg3_set_mtu(dev, tp, new_mtu);
+
+	tg3_init_rings(tp);
+	tg3_init_hw(tp);
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	return 0;
+}
+
+/* Free up pending packets in all rx/tx rings.
+ *
+ * The chip has been shut down and the driver detached from
+ * the networking, so no interrupts or new tx packets will
+ * end up in the driver.  tp->{tx,}lock is not held and we are not
+ * in an interrupt context and thus may sleep.
+ */
+static void tg3_free_rings(struct tg3 *tp)
+{
+	struct ring_info *rxp;
+	int i;
+
+	for (i = 0; i < TG3_RX_RING_SIZE; i++) {
+		rxp = &tp->rx_std_buffers[i];
+
+		if (rxp->skb == NULL)
+			continue;
+		pci_unmap_single(tp->pdev,
+				 pci_unmap_addr(rxp, mapping),
+				 RX_PKT_BUF_SZ - tp->rx_offset,
+				 PCI_DMA_FROMDEVICE);
+		dev_kfree_skb_any(rxp->skb);
+		rxp->skb = NULL;
+	}
+
+	for (i = 0; i < TG3_RX_JUMBO_RING_SIZE; i++) {
+		rxp = &tp->rx_jumbo_buffers[i];
+
+		if (rxp->skb == NULL)
+			continue;
+		pci_unmap_single(tp->pdev,
+				 pci_unmap_addr(rxp, mapping),
+				 RX_JUMBO_PKT_BUF_SZ - tp->rx_offset,
+				 PCI_DMA_FROMDEVICE);
+		dev_kfree_skb_any(rxp->skb);
+		rxp->skb = NULL;
+	}
+
+	for (i = 0; i < TG3_TX_RING_SIZE; ) {
+		struct tx_ring_info *txp;
+		struct sk_buff *skb;
+		int j;
+
+		txp = &tp->tx_buffers[i];
+		skb = txp->skb;
+
+		if (skb == NULL) {
+			i++;
+			continue;
+		}
+
+		pci_unmap_single(tp->pdev,
+				 pci_unmap_addr(txp, mapping),
+				 (skb->len - skb->data_len),
+				 PCI_DMA_TODEVICE);
+		txp->skb = NULL;
+
+		i++;
+
+		for (j = 0; j < skb_shinfo(skb)->nr_frags; j++) {
+			txp = &tp->tx_buffers[i & (TG3_TX_RING_SIZE - 1)];
+			pci_unmap_page(tp->pdev,
+				       pci_unmap_addr(txp, mapping),
+				       skb_shinfo(skb)->frags[j].size,
+				       PCI_DMA_TODEVICE);
+			i++;
+		}
+
+		dev_kfree_skb_any(skb);
+	}
+}
+
+/* Initialize tx/rx rings for packet processing.
+ *
+ * The chip has been shut down and the driver detached from
+ * the networking, so no interrupts or new tx packets will
+ * end up in the driver.  tp->{tx,}lock is not held and we are not
+ * in an interrupt context and thus may sleep.
+ */
+static void tg3_init_rings(struct tg3 *tp)
+{
+	unsigned long start, end;
+	u32 i;
+
+	/* Free up all the SKBs. */
+	tg3_free_rings(tp);
+
+	/* Zero out all descriptors. */
+	memset(tp->rx_std, 0, TG3_RX_RING_BYTES);
+	memset(tp->rx_jumbo, 0, TG3_RX_JUMBO_RING_BYTES);
+	memset(tp->rx_rcb, 0, TG3_RX_RCB_RING_BYTES);
+
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		memset(tp->tx_ring, 0, TG3_TX_RING_BYTES);
+	} else {
+		start = (tp->regs +
+			 NIC_SRAM_WIN_BASE +
+			 NIC_SRAM_TX_BUFFER_DESC);
+		end = start + TG3_TX_RING_BYTES;
+		while (start < end) {
+			writel(0, start);
+			start += 4;
+		}
+		for (i = 0; i < TG3_TX_RING_SIZE; i++)
+			tp->tx_buffers[i].prev_vlan_tag = 0;
+	}
+
+	/* Initialize invariants of the rings, we only set this
+	 * stuff once.  This works because the card does not
+	 * write into the rx buffer posting rings.
+	 */
+	for (i = 0; i < TG3_RX_RING_SIZE; i++) {
+		struct tg3_rx_buffer_desc *rxd;
+
+		rxd = &tp->rx_std[i];
+		rxd->idx_len = (RX_PKT_BUF_SZ - tp->rx_offset - 64)
+			<< RXD_LEN_SHIFT;
+		rxd->type_flags = (RXD_FLAG_END << RXD_FLAGS_SHIFT);
+		rxd->opaque = (RXD_OPAQUE_RING_STD |
+			       (i << RXD_OPAQUE_INDEX_SHIFT));
+	}
+
+	if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE) {
+		for (i = 0; i < TG3_RX_JUMBO_RING_SIZE; i++) {
+			struct tg3_rx_buffer_desc *rxd;
+
+			rxd = &tp->rx_jumbo[i];
+			rxd->idx_len = (RX_JUMBO_PKT_BUF_SZ - tp->rx_offset - 64)
+				<< RXD_LEN_SHIFT;
+			rxd->type_flags = (RXD_FLAG_END << RXD_FLAGS_SHIFT) |
+				RXD_FLAG_JUMBO;
+			rxd->opaque = (RXD_OPAQUE_RING_JUMBO |
+			       (i << RXD_OPAQUE_INDEX_SHIFT));
+		}
+	}
+
+	/* Now allocate fresh SKBs for each rx ring. */
+	for (i = 0; i < tp->rx_pending; i++) {
+		if (tg3_alloc_rx_skb(tp, RXD_OPAQUE_RING_STD,
+				     -1, i) < 0)
+			break;
+	}
+
+	if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE) {
+		for (i = 0; i < tp->rx_jumbo_pending; i++) {
+			if (tg3_alloc_rx_skb(tp, RXD_OPAQUE_RING_JUMBO,
+					     -1, i) < 0)
+				break;
+		}
+	}
+}
+
+/*
+ * Must not be invoked with interrupt sources disabled and
+ * the hardware shutdown down.
+ */
+static void tg3_free_consistent(struct tg3 *tp)
+{
+	if (tp->rx_std_buffers) {
+		kfree(tp->rx_std_buffers);
+		tp->rx_std_buffers = NULL;
+	}
+	if (tp->rx_std) {
+		pci_free_consistent(tp->pdev, TG3_RX_RING_BYTES,
+				    tp->rx_std, tp->rx_std_mapping);
+		tp->rx_std = NULL;
+	}
+	if (tp->rx_jumbo) {
+		pci_free_consistent(tp->pdev, TG3_RX_JUMBO_RING_BYTES,
+				    tp->rx_jumbo, tp->rx_jumbo_mapping);
+		tp->rx_jumbo = NULL;
+	}
+	if (tp->rx_rcb) {
+		pci_free_consistent(tp->pdev, TG3_RX_RCB_RING_BYTES,
+				    tp->rx_rcb, tp->rx_rcb_mapping);
+		tp->rx_rcb = NULL;
+	}
+	if (tp->tx_ring) {
+		pci_free_consistent(tp->pdev, TG3_TX_RING_BYTES,
+			tp->tx_ring, tp->tx_desc_mapping);
+		tp->tx_ring = NULL;
+	}
+	if (tp->hw_status) {
+		pci_free_consistent(tp->pdev, TG3_HW_STATUS_SIZE,
+				    tp->hw_status, tp->status_mapping);
+		tp->hw_status = NULL;
+	}
+	if (tp->hw_stats) {
+		pci_free_consistent(tp->pdev, sizeof(struct tg3_hw_stats),
+				    tp->hw_stats, tp->stats_mapping);
+		tp->hw_stats = NULL;
+	}
+}
+
+/*
+ * Must not be invoked with interrupt sources disabled and
+ * the hardware shutdown down.  Can sleep.
+ */
+static int tg3_alloc_consistent(struct tg3 *tp)
+{
+	tp->rx_std_buffers = kmalloc((sizeof(struct ring_info) *
+				      (TG3_RX_RING_SIZE +
+				       TG3_RX_JUMBO_RING_SIZE)) +
+				     (sizeof(struct tx_ring_info) *
+				      TG3_TX_RING_SIZE),
+				     GFP_KERNEL);
+	if (!tp->rx_std_buffers)
+		return -ENOMEM;
+
+	memset(tp->rx_std_buffers, 0,
+	       (sizeof(struct ring_info) *
+		(TG3_RX_RING_SIZE +
+		 TG3_RX_JUMBO_RING_SIZE)) +
+	       (sizeof(struct tx_ring_info) *
+		TG3_TX_RING_SIZE));
+
+	tp->rx_jumbo_buffers = &tp->rx_std_buffers[TG3_RX_RING_SIZE];
+	tp->tx_buffers = (struct tx_ring_info *)
+		&tp->rx_jumbo_buffers[TG3_RX_JUMBO_RING_SIZE];
+
+	tp->rx_std = pci_alloc_consistent(tp->pdev, TG3_RX_RING_BYTES,
+					  &tp->rx_std_mapping);
+	if (!tp->rx_std)
+		goto err_out;
+
+	tp->rx_jumbo = pci_alloc_consistent(tp->pdev, TG3_RX_JUMBO_RING_BYTES,
+					    &tp->rx_jumbo_mapping);
+
+	if (!tp->rx_jumbo)
+		goto err_out;
+
+	tp->rx_rcb = pci_alloc_consistent(tp->pdev, TG3_RX_RCB_RING_BYTES,
+					  &tp->rx_rcb_mapping);
+	if (!tp->rx_rcb)
+		goto err_out;
+
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		tp->tx_ring = pci_alloc_consistent(tp->pdev, TG3_TX_RING_BYTES,
+						   &tp->tx_desc_mapping);
+		if (!tp->tx_ring)
+			goto err_out;
+	} else {
+		tp->tx_ring = NULL;
+		tp->tx_desc_mapping = 0;
+	}
+
+	tp->hw_status = pci_alloc_consistent(tp->pdev,
+					     TG3_HW_STATUS_SIZE,
+					     &tp->status_mapping);
+	if (!tp->hw_status)
+		goto err_out;
+
+	tp->hw_stats = pci_alloc_consistent(tp->pdev,
+					    sizeof(struct tg3_hw_stats),
+					    &tp->stats_mapping);
+	if (!tp->hw_stats)
+		goto err_out;
+
+	memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+	memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+	return 0;
+
+err_out:
+	tg3_free_consistent(tp);
+	return -ENOMEM;
+}
+
+#define MAX_WAIT_CNT 1000
+
+/* To stop a block, clear the enable bit and poll till it
+ * clears.  tp->lock is held.
+ */
+static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit)
+{
+	unsigned int i;
+	u32 val;
+
+	val = tr32(ofs);
+	val &= ~enable_bit;
+	tw32(ofs, val);
+	tr32(ofs);
+
+	for (i = 0; i < MAX_WAIT_CNT; i++) {
+		udelay(100);
+		val = tr32(ofs);
+		if ((val & enable_bit) == 0)
+			break;
+	}
+
+	if (i == MAX_WAIT_CNT) {
+		printk(KERN_ERR PFX "tg3_stop_block timed out, "
+		       "ofs=%lx enable_bit=%x\n",
+		       ofs, enable_bit);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/* tp->lock is held. */
+static int tg3_abort_hw(struct tg3 *tp)
+{
+	int i, err;
+
+	tg3_disable_ints(tp);
+
+	tp->rx_mode &= ~RX_MODE_ENABLE;
+	tw32(MAC_RX_MODE, tp->rx_mode);
+	tr32(MAC_RX_MODE);
+	udelay(10);
+
+	err  = tg3_stop_block(tp, RCVBDI_MODE, RCVBDI_MODE_ENABLE);
+	err |= tg3_stop_block(tp, RCVLPC_MODE, RCVLPC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, RCVLSC_MODE, RCVLSC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, RCVDBDI_MODE, RCVDBDI_MODE_ENABLE);
+	err |= tg3_stop_block(tp, RCVDCC_MODE, RCVDCC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, RCVCC_MODE, RCVCC_MODE_ENABLE);
+
+	err |= tg3_stop_block(tp, SNDBDS_MODE, SNDBDS_MODE_ENABLE);
+	err |= tg3_stop_block(tp, SNDBDI_MODE, SNDBDI_MODE_ENABLE);
+	err |= tg3_stop_block(tp, SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
+	err |= tg3_stop_block(tp, RDMAC_MODE, RDMAC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, SNDDATAC_MODE, SNDDATAC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, SNDBDC_MODE, SNDBDC_MODE_ENABLE);
+	if (err)
+		goto out;
+
+	tp->mac_mode &= ~MAC_MODE_TDE_ENABLE;
+	tw32(MAC_MODE, tp->mac_mode);
+	tr32(MAC_MODE);
+	udelay(40);
+
+	tp->tx_mode &= ~TX_MODE_ENABLE;
+	tw32(MAC_TX_MODE, tp->tx_mode);
+	tr32(MAC_TX_MODE);
+
+	for (i = 0; i < MAX_WAIT_CNT; i++) {
+		udelay(100);
+		if (!(tr32(MAC_TX_MODE) & TX_MODE_ENABLE))
+			break;
+	}
+	if (i >= MAX_WAIT_CNT) {
+		printk(KERN_ERR PFX "tg3_abort_hw timed out for %s, "
+		       "TX_MODE_ENABLE will not clear MAC_TX_MODE=%08x\n",
+		       tp->dev->name, tr32(MAC_TX_MODE));
+		return -ENODEV;
+	}
+
+	err  = tg3_stop_block(tp, HOSTCC_MODE, HOSTCC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, WDMAC_MODE, WDMAC_MODE_ENABLE);
+	err |= tg3_stop_block(tp, MBFREE_MODE, MBFREE_MODE_ENABLE);
+
+	tw32(FTQ_RESET, 0xffffffff);
+	tw32(FTQ_RESET, 0x00000000);
+
+	err |= tg3_stop_block(tp, BUFMGR_MODE, BUFMGR_MODE_ENABLE);
+	err |= tg3_stop_block(tp, MEMARB_MODE, MEMARB_MODE_ENABLE);
+	if (err)
+		goto out;
+
+	memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+
+out:
+	return err;
+}
+
+/* tp->lock is held. */
+static void tg3_chip_reset(struct tg3 *tp)
+{
+	u32 val;
+
+	/* Force NVRAM to settle.
+	 * This deals with a chip bug which can result in EEPROM
+	 * corruption.
+	 */
+	if (tp->tg3_flags & TG3_FLAG_NVRAM) {
+		int i;
+
+		tw32(NVRAM_SWARB, SWARB_REQ_SET1);
+		for (i = 0; i < 100000; i++) {
+			if (tr32(NVRAM_SWARB) & SWARB_GNT1)
+				break;
+			udelay(10);
+		}
+	}
+
+	tw32(GRC_MISC_CFG, GRC_MISC_CFG_CORECLK_RESET);
+
+	/* Flush PCI posted writes.  The normal MMIO registers
+	 * are inaccessible at this time so this is the only
+	 * way to make this reliably.  I tried to use indirect
+	 * register read/write but this upset some 5701 variants.
+	 */
+	pci_read_config_dword(tp->pdev, PCI_COMMAND, &val);
+
+	udelay(40);
+	udelay(40);
+	udelay(40);
+
+	/* Re-enable indirect register accesses. */
+	pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+			       tp->misc_host_ctrl);
+
+	/* Set MAX PCI retry to zero. */
+	val = (PCISTATE_ROM_ENABLE | PCISTATE_ROM_RETRY_ENABLE);
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0 &&
+	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE))
+		val |= PCISTATE_RETRY_SAME_DMA;
+	pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, val);
+
+	pci_restore_state(tp->pdev, tp->pci_cfg_state);
+
+	/* Make sure PCI-X relaxed ordering bit is clear. */
+	pci_read_config_dword(tp->pdev, TG3PCI_X_CAPS, &val);
+	val &= ~PCIX_CAPS_RELAXED_ORDERING;
+	pci_write_config_dword(tp->pdev, TG3PCI_X_CAPS, val);
+
+	tw32(MEMARB_MODE, MEMARB_MODE_ENABLE);
+
+	tw32(TG3PCI_MISC_HOST_CTRL, tp->misc_host_ctrl);
+}
+
+/* tp->lock is held. */
+static void tg3_stop_fw(struct tg3 *tp)
+{
+	if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+		u32 val;
+		int i;
+
+		tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_PAUSE_FW);
+		val = tr32(GRC_RX_CPU_EVENT);
+		val |= (1 << 14);
+		tw32(GRC_RX_CPU_EVENT, val);
+
+		/* Wait for RX cpu to ACK the event.  */
+		for (i = 0; i < 100; i++) {
+			if (!(tr32(GRC_RX_CPU_EVENT) & (1 << 14)))
+				break;
+			udelay(1);
+		}
+	}
+}
+
+/* tp->lock is held. */
+static int tg3_halt(struct tg3 *tp)
+{
+	u32 val;
+	int i;
+
+	tg3_stop_fw(tp);
+	tg3_abort_hw(tp);
+	tg3_chip_reset(tp);
+	tg3_write_mem(tp,
+		      NIC_SRAM_FIRMWARE_MBOX,
+		      NIC_SRAM_FIRMWARE_MBOX_MAGIC1);
+	for (i = 0; i < 100000; i++) {
+		tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
+		if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1)
+			break;
+		udelay(10);
+	}
+
+	if (i >= 100000) {
+		printk(KERN_ERR PFX "tg3_halt timed out for %s, "
+		       "firmware will not restart magic=%08x\n",
+		       tp->dev->name, val);
+		return -ENODEV;
+	}
+
+	if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+		if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)
+			tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+				      DRV_STATE_WOL);
+		else
+			tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+				      DRV_STATE_UNLOAD);
+	} else
+		tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+			      DRV_STATE_SUSPEND);
+
+	return 0;
+}
+
+#define TG3_FW_RELEASE_MAJOR	0x0
+#define TG3_FW_RELASE_MINOR	0x0
+#define TG3_FW_RELEASE_FIX	0x0
+#define TG3_FW_START_ADDR	0x08000000
+#define TG3_FW_TEXT_ADDR	0x08000000
+#define TG3_FW_TEXT_LEN		0x9c0
+#define TG3_FW_RODATA_ADDR	0x080009c0
+#define TG3_FW_RODATA_LEN	0x60
+#define TG3_FW_DATA_ADDR	0x08000a40
+#define TG3_FW_DATA_LEN		0x20
+#define TG3_FW_SBSS_ADDR	0x08000a60
+#define TG3_FW_SBSS_LEN		0xc
+#define TG3_FW_BSS_ADDR		0x08000a70
+#define TG3_FW_BSS_LEN		0x10
+
+static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
+	0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
+	0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000,
+	0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034,
+	0x0e00021c, 0x00000000, 0x0000000d, 0x00000000, 0x00000000, 0x00000000,
+	0x27bdffe0, 0x3c1cc000, 0xafbf0018, 0xaf80680c, 0x0e00004c, 0x241b2105,
+	0x97850000, 0x97870002, 0x9782002c, 0x9783002e, 0x3c040800, 0x248409c0,
+	0xafa00014, 0x00021400, 0x00621825, 0x00052c00, 0xafa30010, 0x8f860010,
+	0x00e52825, 0x0e000060, 0x24070102, 0x3c02ac00, 0x34420100, 0x3c03ac01,
+	0x34630100, 0xaf820490, 0x3c02ffff, 0xaf820494, 0xaf830498, 0xaf82049c,
+	0x24020001, 0xaf825ce0, 0x0e00003f, 0xaf825d00, 0x0e000140, 0x00000000,
+	0x8fbf0018, 0x03e00008, 0x27bd0020, 0x2402ffff, 0xaf825404, 0x8f835400,
+	0x34630400, 0xaf835400, 0xaf825404, 0x3c020800, 0x24420034, 0xaf82541c,
+	0x03e00008, 0xaf805400, 0x00000000, 0x00000000, 0x3c020800, 0x34423000,
+	0x3c030800, 0x34633000, 0x3c040800, 0x348437ff, 0x3c010800, 0xac220a64,
+	0x24020040, 0x3c010800, 0xac220a68, 0x3c010800, 0xac200a60, 0xac600000,
+	0x24630004, 0x0083102b, 0x5040fffd, 0xac600000, 0x03e00008, 0x00000000,
+	0x00804821, 0x8faa0010, 0x3c020800, 0x8c420a60, 0x3c040800, 0x8c840a68,
+	0x8fab0014, 0x24430001, 0x0044102b, 0x3c010800, 0xac230a60, 0x14400003,
+	0x00004021, 0x3c010800, 0xac200a60, 0x3c020800, 0x8c420a60, 0x3c030800,
+	0x8c630a64, 0x91240000, 0x00021140, 0x00431021, 0x00481021, 0x25080001,
+	0xa0440000, 0x29020008, 0x1440fff4, 0x25290001, 0x3c020800, 0x8c420a60,
+	0x3c030800, 0x8c630a64, 0x8f84680c, 0x00021140, 0x00431021, 0xac440008,
+	0xac45000c, 0xac460010, 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0,
+	0x02000008, 0x00000000, 0x0a0001e3, 0x3c0a0001, 0x0a0001e3, 0x3c0a0002,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x3c0a0007, 0x0a0001e3, 0x3c0a0008, 0x0a0001e3, 0x3c0a0009,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a000b,
+	0x0a0001e3, 0x3c0a000c, 0x0a0001e3, 0x3c0a000d, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a000e, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000, 0x0a0001e3, 0x00000000,
+	0x0a0001e3, 0x00000000, 0x0a0001e3, 0x3c0a0013, 0x0a0001e3, 0x3c0a0014,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0x27bdffe0, 0x00001821, 0x00001021, 0xafbf0018, 0xafb10014, 0xafb00010,
+	0x3c010800, 0x00220821, 0xac200a70, 0x3c010800, 0x00220821, 0xac200a74,
+	0x3c010800, 0x00220821, 0xac200a78, 0x24630001, 0x1860fff5, 0x2442000c,
+	0x24110001, 0x8f906810, 0x32020004, 0x14400005, 0x24040001, 0x3c020800,
+	0x8c420a78, 0x18400003, 0x00002021, 0x0e000182, 0x00000000, 0x32020001,
+	0x10400003, 0x00000000, 0x0e000169, 0x00000000, 0x0a000153, 0xaf915028,
+	0x8fbf0018, 0x8fb10014, 0x8fb00010, 0x03e00008, 0x27bd0020, 0x3c050800,
+	0x8ca50a70, 0x3c060800, 0x8cc60a80, 0x3c070800, 0x8ce70a78, 0x27bdffe0,
+	0x3c040800, 0x248409d0, 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014,
+	0x0e00017b, 0x00002021, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x24020001,
+	0x8f836810, 0x00821004, 0x00021027, 0x00621824, 0x03e00008, 0xaf836810,
+	0x27bdffd8, 0xafbf0024, 0x1080002e, 0xafb00020, 0x8f825cec, 0xafa20018,
+	0x8f825cec, 0x3c100800, 0x26100a78, 0xafa2001c, 0x34028000, 0xaf825cec,
+	0x8e020000, 0x18400016, 0x00000000, 0x3c020800, 0x94420a74, 0x8fa3001c,
+	0x000221c0, 0xac830004, 0x8fa2001c, 0x3c010800, 0x0e000201, 0xac220a74,
+	0x10400005, 0x00000000, 0x8e020000, 0x24420001, 0x0a0001df, 0xae020000,
+	0x3c020800, 0x8c420a70, 0x00021c02, 0x000321c0, 0x0a0001c5, 0xafa2001c,
+	0x0e000201, 0x00000000, 0x1040001f, 0x00000000, 0x8e020000, 0x8fa3001c,
+	0x24420001, 0x3c010800, 0xac230a70, 0x3c010800, 0xac230a74, 0x0a0001df,
+	0xae020000, 0x3c100800, 0x26100a78, 0x8e020000, 0x18400028, 0x00000000,
+	0x0e000201, 0x00000000, 0x14400024, 0x00000000, 0x8e020000, 0x3c030800,
+	0x8c630a70, 0x2442ffff, 0xafa3001c, 0x18400006, 0xae020000, 0x00031402,
+	0x000221c0, 0x8c820004, 0x3c010800, 0xac220a70, 0x97a2001e, 0x2442ff00,
+	0x2c420300, 0x1440000b, 0x24024000, 0x3c040800, 0x248409dc, 0xafa00010,
+	0xafa00014, 0x8fa6001c, 0x24050008, 0x0e000060, 0x00003821, 0x0a0001df,
+	0x00000000, 0xaf825cf8, 0x3c020800, 0x8c420a40, 0x8fa3001c, 0x24420001,
+	0xaf835cf8, 0x3c010800, 0xac220a40, 0x8fbf0024, 0x8fb00020, 0x03e00008,
+	0x27bd0028, 0x27bdffe0, 0x3c040800, 0x248409e8, 0x00002821, 0x00003021,
+	0x00003821, 0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014, 0x8fbf0018,
+	0x03e00008, 0x27bd0020, 0x8f82680c, 0x8f85680c, 0x00021827, 0x0003182b,
+	0x00031823, 0x00431024, 0x00441021, 0x00a2282b, 0x10a00006, 0x00000000,
+	0x00401821, 0x8f82680c, 0x0043102b, 0x1440fffd, 0x00000000, 0x03e00008,
+	0x00000000, 0x3c040800, 0x8c840000, 0x3c030800, 0x8c630a40, 0x0064102b,
+	0x54400002, 0x00831023, 0x00641023, 0x2c420008, 0x03e00008, 0x38420001,
+	0x27bdffe0, 0x00802821, 0x3c040800, 0x24840a00, 0x00003021, 0x00003821,
+	0xafbf0018, 0xafa00010, 0x0e000060, 0xafa00014, 0x0a000216, 0x00000000,
+	0x8fbf0018, 0x03e00008, 0x27bd0020, 0x00000000, 0x27bdffe0, 0x3c1cc000,
+	0xafbf0018, 0x0e00004c, 0xaf80680c, 0x3c040800, 0x24840a10, 0x03802821,
+	0x00003021, 0x00003821, 0xafa00010, 0x0e000060, 0xafa00014, 0x2402ffff,
+	0xaf825404, 0x3c0200aa, 0x0e000234, 0xaf825434, 0x8fbf0018, 0x03e00008,
+	0x27bd0020, 0x00000000, 0x00000000, 0x00000000, 0x27bdffe8, 0xafb00010,
+	0x24100001, 0xafbf0014, 0x3c01c003, 0xac200000, 0x8f826810, 0x30422000,
+	0x10400003, 0x00000000, 0x0e000246, 0x00000000, 0x0a00023a, 0xaf905428,
+	0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x27bdfff8, 0x8f845d0c,
+	0x3c0200ff, 0x3c030800, 0x8c630a50, 0x3442fff8, 0x00821024, 0x1043001e,
+	0x3c0500ff, 0x34a5fff8, 0x3c06c003, 0x3c074000, 0x00851824, 0x8c620010,
+	0x3c010800, 0xac230a50, 0x30420008, 0x10400005, 0x00871025, 0x8cc20000,
+	0x24420001, 0xacc20000, 0x00871025, 0xaf825d0c, 0x8fa20000, 0x24420001,
+	0xafa20000, 0x8fa20000, 0x8fa20000, 0x24420001, 0xafa20000, 0x8fa20000,
+	0x8f845d0c, 0x3c030800, 0x8c630a50, 0x00851024, 0x1443ffe8, 0x00851824,
+	0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000
+};
+
+static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
+	0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430,
+	0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
+	0x00000000, 0x00000000, 0x4d61696e, 0x43707542, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+#if 0 /* All zeros, dont eat up space with it. */
+u32 tg3FwData[(TG3_FW_DATA_LEN / sizeof(u32)) + 1] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+#endif
+
+#define RX_CPU_SCRATCH_BASE	0x30000
+#define RX_CPU_SCRATCH_SIZE	0x04000
+#define TX_CPU_SCRATCH_BASE	0x34000
+#define TX_CPU_SCRATCH_SIZE	0x04000
+
+/* tp->lock is held. */
+static int tg3_reset_cpu(struct tg3 *tp, u32 offset)
+{
+	int i;
+
+	tw32(offset + CPU_STATE, 0xffffffff);
+	tw32(offset + CPU_MODE,  CPU_MODE_RESET);
+	if (offset == RX_CPU_BASE) {
+		for (i = 0; i < 10000; i++)
+			if (!(tr32(offset + CPU_MODE) & CPU_MODE_RESET))
+				break;
+		tw32(offset + CPU_STATE, 0xffffffff);
+		tw32(offset + CPU_MODE,  CPU_MODE_RESET);
+		tr32(offset + CPU_MODE);
+		udelay(10);
+	} else {
+		for (i = 0; i < 10000; i++) {
+			if (!(tr32(offset + CPU_MODE) & CPU_MODE_RESET))
+				break;
+			tw32(offset + CPU_STATE, 0xffffffff);
+			tw32(offset + CPU_MODE,  CPU_MODE_RESET);
+			tr32(offset + CPU_MODE);
+			udelay(10);
+		}
+	}
+
+	if (i >= 10000) {
+		printk(KERN_ERR PFX "tg3_reset_cpu timed out for %s, "
+		       "and %s CPU\n",
+		       tp->dev->name,
+		       (offset == RX_CPU_BASE ? "RX" : "TX"));
+		return -ENODEV;
+	}
+	return 0;
+}
+
+struct fw_info {
+	unsigned int text_base;
+	unsigned int text_len;
+	u32 *text_data;
+	unsigned int rodata_base;
+	unsigned int rodata_len;
+	u32 *rodata_data;
+	unsigned int data_base;
+	unsigned int data_len;
+	u32 *data_data;
+};
+
+/* tp->lock is held. */
+static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_base,
+				 int cpu_scratch_size, struct fw_info *info)
+{
+	int err, i;
+	u32 orig_tg3_flags = tp->tg3_flags;
+
+	/* Force use of PCI config space for indirect register
+	 * write calls.
+	 */
+	tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
+
+	err = tg3_reset_cpu(tp, cpu_base);
+	if (err)
+		goto out;
+
+	for (i = 0; i < cpu_scratch_size; i += sizeof(u32))
+		tg3_write_indirect_reg32(tp, cpu_scratch_base + i, 0);
+	tw32(cpu_base + CPU_STATE, 0xffffffff);
+	tw32(cpu_base + CPU_MODE, tr32(cpu_base+CPU_MODE)|CPU_MODE_HALT);
+	for (i = 0; i < (info->text_len / sizeof(u32)); i++)
+		tg3_write_indirect_reg32(tp, (cpu_scratch_base +
+					      (info->text_base & 0xffff) +
+					      (i * sizeof(u32))),
+					 (info->text_data ?
+					  info->text_data[i] : 0));
+	for (i = 0; i < (info->rodata_len / sizeof(u32)); i++)
+		tg3_write_indirect_reg32(tp, (cpu_scratch_base +
+					      (info->rodata_base & 0xffff) +
+					      (i * sizeof(u32))),
+					 (info->rodata_data ?
+					  info->rodata_data[i] : 0));
+	for (i = 0; i < (info->data_len / sizeof(u32)); i++)
+		tg3_write_indirect_reg32(tp, (cpu_scratch_base +
+					      (info->data_base & 0xffff) +
+					      (i * sizeof(u32))),
+					 (info->data_data ?
+					  info->data_data[i] : 0));
+
+	err = 0;
+
+out:
+	tp->tg3_flags = orig_tg3_flags;
+	return err;
+}
+
+/* tp->lock is held. */
+static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
+{
+	struct fw_info info;
+	int err, i;
+
+	info.text_base = TG3_FW_TEXT_ADDR;
+	info.text_len = TG3_FW_TEXT_LEN;
+	info.text_data = &tg3FwText[0];
+	info.rodata_base = TG3_FW_RODATA_ADDR;
+	info.rodata_len = TG3_FW_RODATA_LEN;
+	info.rodata_data = &tg3FwRodata[0];
+	info.data_base = TG3_FW_DATA_ADDR;
+	info.data_len = TG3_FW_DATA_LEN;
+	info.data_data = NULL;
+
+	err = tg3_load_firmware_cpu(tp, RX_CPU_BASE,
+				    RX_CPU_SCRATCH_BASE, RX_CPU_SCRATCH_SIZE,
+				    &info);
+	if (err)
+		return err;
+
+	err = tg3_load_firmware_cpu(tp, TX_CPU_BASE,
+				    TX_CPU_SCRATCH_BASE, TX_CPU_SCRATCH_SIZE,
+				    &info);
+	if (err)
+		return err;
+
+	/* Now startup only the RX cpu. */
+	tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
+	tw32(RX_CPU_BASE + CPU_PC,    TG3_FW_TEXT_ADDR);
+
+	/* Flush posted writes. */
+	tr32(RX_CPU_BASE + CPU_PC);
+	for (i = 0; i < 5; i++) {
+		if (tr32(RX_CPU_BASE + CPU_PC) == TG3_FW_TEXT_ADDR)
+			break;
+		tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
+		tw32(RX_CPU_BASE + CPU_MODE,  CPU_MODE_HALT);
+		tw32(RX_CPU_BASE + CPU_PC,    TG3_FW_TEXT_ADDR);
+
+		/* Flush posted writes. */
+		tr32(RX_CPU_BASE + CPU_PC);
+
+		udelay(1000);
+	}
+	if (i >= 5) {
+		printk(KERN_ERR PFX "tg3_load_firmware fails for %s "
+		       "to set RX CPU PC, is %08x should be %08x\n",
+		       tp->dev->name, tr32(RX_CPU_BASE + CPU_PC),
+		       TG3_FW_TEXT_ADDR);
+		return -ENODEV;
+	}
+	tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff);
+	tw32(RX_CPU_BASE + CPU_MODE,  0x00000000);
+
+	/* Flush posted writes. */
+	tr32(RX_CPU_BASE + CPU_MODE);
+
+	return 0;
+}
+
+#if TG3_DO_TSO != 0
+
+#define TG3_TSO_FW_RELEASE_MAJOR	0x1
+#define TG3_TSO_FW_RELASE_MINOR		0x8
+#define TG3_TSO_FW_RELEASE_FIX		0x0
+#define TG3_TSO_FW_START_ADDR		0x08000000
+#define TG3_TSO_FW_TEXT_ADDR		0x08000000
+#define TG3_TSO_FW_TEXT_LEN		0x1650
+#define TG3_TSO_FW_RODATA_ADDR		0x08001650
+#define TG3_TSO_FW_RODATA_LEN		0x30
+#define TG3_TSO_FW_DATA_ADDR		0x080016a0
+#define TG3_TSO_FW_DATA_LEN		0x20
+#define TG3_TSO_FW_SBSS_ADDR		0x080016c0
+#define TG3_TSO_FW_SBSS_LEN		0x14
+#define TG3_TSO_FW_BSS_ADDR		0x080016e0
+#define TG3_TSO_FW_BSS_LEN		0x8fc
+
+static u32 tg3TsoFwText[] = {
+	0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
+	0x37bd4000, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000010, 0x00000000,
+	0x0000000d, 0x00000000, 0x00000000, 0x00000000, 0x27bdffe0, 0x3c1bc000,
+	0xafbf0018, 0x0e000058, 0xaf60680c, 0x3c040800, 0x24841650, 0x03602821,
+	0x24060001, 0x24070004, 0xafa00010, 0x0e00006c, 0xafa00014, 0x8f625c50,
+	0x34420001, 0xaf625c50, 0x8f625c90, 0x34420001, 0xaf625c90, 0x2402ffff,
+	0x0e000098, 0xaf625404, 0x8fbf0018, 0x03e00008, 0x27bd0020, 0x00000000,
+	0x00000000, 0x00000000, 0x24030b60, 0x24050fff, 0xac000b50, 0x00002021,
+	0xac640000, 0x24630004, 0x0065102b, 0x1440fffc, 0x24840001, 0x24030b60,
+	0x0065102b, 0x10400011, 0x00002021, 0x24090b54, 0x3c06dead, 0x34c6beef,
+	0x24080b58, 0x24070b5c, 0x8c620000, 0x50440006, 0x24630004, 0xad260000,
+	0x8c620000, 0xace40000, 0xad020000, 0x24630004, 0x0065102b, 0x1440fff6,
+	0x24840001, 0x03e00008, 0x00000000, 0x27bdfff8, 0x18800009, 0x00002821,
+	0x8f63680c, 0x8f62680c, 0x1043fffe, 0x00000000, 0x24a50001, 0x00a4102a,
+	0x1440fff9, 0x00000000, 0x03e00008, 0x27bd0008, 0x3c020800, 0x34423000,
+	0x3c030800, 0x34633000, 0x3c040800, 0x348437ff, 0x3c010800, 0xac2216c4,
+	0x24020040, 0x3c010800, 0xac2216c8, 0x3c010800, 0xac2016c0, 0xac600000,
+	0x24630004, 0x0083102b, 0x5040fffd, 0xac600000, 0x03e00008, 0x00000000,
+	0x00804821, 0x8faa0010, 0x3c020800, 0x8c4216c0, 0x3c040800, 0x8c8416c8,
+	0x8fab0014, 0x24430001, 0x0044102b, 0x3c010800, 0xac2316c0, 0x14400003,
+	0x00004021, 0x3c010800, 0xac2016c0, 0x3c020800, 0x8c4216c0, 0x3c030800,
+	0x8c6316c4, 0x91240000, 0x00021140, 0x00431021, 0x00481021, 0x25080001,
+	0xa0440000, 0x29020008, 0x1440fff4, 0x25290001, 0x3c020800, 0x8c4216c0,
+	0x3c030800, 0x8c6316c4, 0x8f64680c, 0x00021140, 0x00431021, 0xac440008,
+	0xac45000c, 0xac460010, 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c,
+	0x00000000, 0x00000000, 0x27bdffe0, 0xafbf0018, 0xafb10014, 0x0e0000b6,
+	0xafb00010, 0x24110001, 0x8f706820, 0x32020100, 0x10400003, 0x00000000,
+	0x0e000127, 0x00000000, 0x8f706820, 0x32022000, 0x10400004, 0x32020001,
+	0x0e00025a, 0x24040001, 0x32020001, 0x10400003, 0x00000000, 0x0e0000e6,
+	0x00000000, 0x0a00009e, 0xaf715028, 0x8fbf0018, 0x8fb10014, 0x8fb00010,
+	0x03e00008, 0x27bd0020, 0x27bdffe0, 0x3c040800, 0x24841660, 0x00002821,
+	0x00003021, 0x00003821, 0xafbf0018, 0xafa00010, 0x0e00006c, 0xafa00014,
+	0x3c010800, 0xa4201fb8, 0x3c010800, 0xa02016f8, 0x3c010800, 0xac2016fc,
+	0x3c010800, 0xac201700, 0x3c010800, 0xac201704, 0x3c010800, 0xac20170c,
+	0x3c010800, 0xac201718, 0x3c010800, 0xac20171c, 0x8f624434, 0x3c010800,
+	0xac2216e8, 0x8f624438, 0x3c010800, 0xac2216ec, 0x8f624410, 0x3c010800,
+	0xac2016e0, 0x3c010800, 0xac2016e4, 0x3c010800, 0xac201fc0, 0x3c010800,
+	0xac201f68, 0x3c010800, 0xac201f6c, 0x3c010800, 0xac2216f0, 0x8fbf0018,
+	0x03e00008, 0x27bd0020, 0x27bdffe0, 0x3c040800, 0x2484166c, 0x00002821,
+	0x00003021, 0x00003821, 0xafbf0018, 0xafa00010, 0x0e00006c, 0xafa00014,
+	0x3c040800, 0x24841660, 0x00002821, 0x00003021, 0x00003821, 0xafa00010,
+	0x0e00006c, 0xafa00014, 0x3c010800, 0xa4201fb8, 0x3c010800, 0xa02016f8,
+	0x3c010800, 0xac2016fc, 0x3c010800, 0xac201700, 0x3c010800, 0xac201704,
+	0x3c010800, 0xac20170c, 0x3c010800, 0xac201718, 0x3c010800, 0xac20171c,
+	0x8f624434, 0x3c010800, 0xac2216e8, 0x8f624438, 0x3c010800, 0xac2216ec,
+	0x8f624410, 0x3c010800, 0xac2016e0, 0x3c010800, 0xac2016e4, 0x3c010800,
+	0xac201fc0, 0x3c010800, 0xac201f68, 0x3c010800, 0xac201f6c, 0x3c010800,
+	0xac2216f0, 0x0e000120, 0x00002021, 0x8fbf0018, 0x03e00008, 0x27bd0020,
+	0x24020001, 0x8f636820, 0x00821004, 0x00021027, 0x00621824, 0x03e00008,
+	0xaf636820, 0x27bdffd0, 0x3c0300ff, 0xafbf002c, 0xafb60028, 0xafb50024,
+	0xafb40020, 0xafb3001c, 0xafb20018, 0xafb10014, 0xafb00010, 0x8f665c5c,
+	0x3c040800, 0x2484171c, 0x8c820000, 0x3463fff8, 0x14460005, 0x00c38824,
+	0x3c020800, 0x904216f8, 0x14400115, 0x00000000, 0x00111902, 0x306300ff,
+	0x30c20003, 0x000211c0, 0x00623825, 0x00e02821, 0x00061602, 0xac860000,
+	0x3c030800, 0x906316f8, 0x3044000f, 0x1460002b, 0x00804021, 0x24020001,
+	0x3c010800, 0xa02216f8, 0x00071100, 0x00821025, 0x3c010800, 0xac2016fc,
+	0x3c010800, 0xac201700, 0x3c010800, 0xac201704, 0x3c010800, 0xac20170c,
+	0x3c010800, 0xac201718, 0x3c010800, 0xac201710, 0x3c010800, 0xac201714,
+	0x3c010800, 0xa4221fb8, 0x9623000c, 0x30628000, 0x10400008, 0x30627fff,
+	0x2442003e, 0x3c010800, 0xa42216f6, 0x24020001, 0x3c010800, 0x0a00016e,
+	0xac221fd4, 0x24620036, 0x3c010800, 0xa42216f6, 0x3c010800, 0xac201fd4,
+	0x3c010800, 0xac201fd0, 0x3c010800, 0x0a000176, 0xac201fd8, 0x9622000c,
+	0x3c010800, 0xa4221fcc, 0x3c040800, 0x248416fc, 0x8c820000, 0x00021100,
+	0x3c010800, 0x00220821, 0xac311728, 0x8c820000, 0x00021100, 0x3c010800,
+	0x00220821, 0xac26172c, 0x8c820000, 0x24a30001, 0x306701ff, 0x00021100,
+	0x3c010800, 0x00220821, 0xac271730, 0x8c820000, 0x00021100, 0x3c010800,
+	0x00220821, 0xac281734, 0x96230008, 0x3c020800, 0x8c42170c, 0x00432821,
+	0x3c010800, 0xac25170c, 0x9622000a, 0x30420004, 0x14400019, 0x00071100,
+	0x3c02c000, 0x00c21825, 0xaf635c5c, 0x8f625c50, 0x30420002, 0x1440fffc,
+	0x00000000, 0x8f630c14, 0x3063000f, 0x2c620002, 0x1440001e, 0x00000000,
+	0x8f630c14, 0x3c020800, 0x8c4216b4, 0x3063000f, 0x24420001, 0x3c010800,
+	0xac2216b4, 0x2c620002, 0x1040fff7, 0x00000000, 0x0a0001c1, 0x00000000,
+	0x3c030800, 0x8c6316e0, 0x3c040800, 0x948416f4, 0x01021025, 0x3c010800,
+	0xa4221fba, 0x24020001, 0x3c010800, 0xac221718, 0x24630001, 0x0085202a,
+	0x3c010800, 0x10800003, 0xac2316e0, 0x3c010800, 0xa42516f4, 0x3c030800,
+	0x246316fc, 0x8c620000, 0x24420001, 0xac620000, 0x28420080, 0x14400005,
+	0x24020001, 0x0e0002df, 0x24040002, 0x0a000250, 0x00000000, 0x3c030800,
+	0x906316f8, 0x1462007c, 0x24020003, 0x3c160800, 0x96d616f6, 0x3c050800,
+	0x8ca5170c, 0x32c4ffff, 0x00a4102a, 0x14400078, 0x00000000, 0x3c020800,
+	0x8c421718, 0x10400005, 0x32c2ffff, 0x14a40003, 0x00000000, 0x3c010800,
+	0xac231fd0, 0x10400062, 0x00009021, 0x0040a021, 0x3c150800, 0x26b51700,
+	0x26b30010, 0x8ea20000, 0x00028100, 0x3c110800, 0x02308821, 0x0e0002e1,
+	0x8e311728, 0x00403021, 0x10c00059, 0x00000000, 0x9628000a, 0x31020040,
+	0x10400004, 0x2407180c, 0x8e22000c, 0x2407188c, 0xacc20018, 0x31021000,
+	0x10400004, 0x34e32000, 0x00081040, 0x3042c000, 0x00623825, 0x3c030800,
+	0x00701821, 0x8c631730, 0x3c020800, 0x00501021, 0x8c421734, 0x00031d00,
+	0x00021400, 0x00621825, 0xacc30014, 0x8ea30004, 0x96220008, 0x00432023,
+	0x3242ffff, 0x3083ffff, 0x00431021, 0x0282102a, 0x14400002, 0x02d22823,
+	0x00802821, 0x8e620000, 0x30a4ffff, 0x00441021, 0xae620000, 0x8e220000,
+	0xacc20000, 0x8e220004, 0x8e63fff4, 0x00431021, 0xacc20004, 0xa4c5000e,
+	0x8e62fff4, 0x00441021, 0xae62fff4, 0x96230008, 0x0043102a, 0x14400005,
+	0x02459021, 0x8e62fff0, 0xae60fff4, 0x24420001, 0xae62fff0, 0xacc00008,
+	0x3242ffff, 0x14540008, 0x24020305, 0x31020080, 0x54400001, 0x34e70010,
+	0x24020905, 0xa4c2000c, 0x0a000233, 0x34e70020, 0xa4c2000c, 0x30e2ffff,
+	0xacc20010, 0x3c020800, 0x8c421fd0, 0x10400003, 0x3c024b65, 0x0a00023d,
+	0x34427654, 0x3c02b49a, 0x344289ab, 0xacc2001c, 0x0e000560, 0x00c02021,
+	0x3242ffff, 0x0054102b, 0x1440ffa4, 0x00000000, 0x24020002, 0x3c010800,
+	0x0a000250, 0xa02216f8, 0x8ea208bc, 0x24420001, 0x0a000250, 0xaea208bc,
+	0x14620003, 0x00000000, 0x0e000450, 0x00000000, 0x8fbf002c, 0x8fb60028,
+	0x8fb50024, 0x8fb40020, 0x8fb3001c, 0x8fb20018, 0x8fb10014, 0x8fb00010,
+	0x03e00008, 0x27bd0030, 0x27bdffd8, 0xafb3001c, 0x00809821, 0xafbf0020,
+	0xafb20018, 0xafb10014, 0xafb00010, 0x8f725c9c, 0x3c0200ff, 0x3442fff8,
+	0x3c040800, 0x24841714, 0x02428824, 0x9623000e, 0x8c820000, 0x00431021,
+	0xac820000, 0x8e220010, 0x30420020, 0x14400011, 0x00000000, 0x0e0002f7,
+	0x02202021, 0x3c02c000, 0x02421825, 0xaf635c9c, 0x8f625c90, 0x30420002,
+	0x10400061, 0x00000000, 0xaf635c9c, 0x8f625c90, 0x30420002, 0x1040005c,
+	0x00000000, 0x0a000278, 0x00000000, 0x8e220008, 0x00021c02, 0x000321c0,
+	0x3042ffff, 0x3c030800, 0x906316f8, 0x000229c0, 0x24020002, 0x14620003,
+	0x3c034b65, 0x0a000290, 0x00008021, 0x8e22001c, 0x34637654, 0x10430002,
+	0x24100002, 0x24100001, 0x0e000300, 0x02003021, 0x24020003, 0x3c010800,
+	0xa02216f8, 0x24020002, 0x1202000a, 0x24020001, 0x3c030800, 0x8c631fd0,
+	0x10620006, 0x00000000, 0x3c020800, 0x94421fb8, 0x00021400, 0x0a0002cd,
+	0xae220014, 0x3c040800, 0x24841fba, 0x94820000, 0x00021400, 0xae220014,
+	0x3c020800, 0x8c42171c, 0x3c03c000, 0x3c010800, 0xa02016f8, 0x00431025,
+	0xaf625c5c, 0x8f625c50, 0x30420002, 0x10400009, 0x00000000, 0x2484f762,
+	0x8c820000, 0x00431025, 0xaf625c5c, 0x8f625c50, 0x30420002, 0x1440fffa,
+	0x00000000, 0x3c020800, 0x244216e4, 0x8c430000, 0x24630001, 0xac430000,
+	0x8f630c14, 0x3063000f, 0x2c620002, 0x1440000b, 0x00009821, 0x8f630c14,
+	0x3c020800, 0x8c4216b4, 0x3063000f, 0x24420001, 0x3c010800, 0xac2216b4,
+	0x2c620002, 0x1040fff7, 0x00009821, 0x3c024000, 0x02421825, 0xaf635c9c,
+	0x8f625c90, 0x30420002, 0x1440fffc, 0x00000000, 0x12600003, 0x00000000,
+	0x0e000450, 0x00000000, 0x8fbf0020, 0x8fb3001c, 0x8fb20018, 0x8fb10014,
+	0x8fb00010, 0x03e00008, 0x27bd0028, 0x0a0002df, 0x00000000, 0x8f634450,
+	0x3c040800, 0x248416e8, 0x8c820000, 0x00031c02, 0x0043102b, 0x14400007,
+	0x3c038000, 0x8c840004, 0x8f624450, 0x00021c02, 0x0083102b, 0x1040fffc,
+	0x3c038000, 0xaf634444, 0x8f624444, 0x00431024, 0x1440fffd, 0x00000000,
+	0x8f624448, 0x03e00008, 0x3042ffff, 0x3c024000, 0x00822025, 0xaf645c38,
+	0x8f625c30, 0x30420002, 0x1440fffc, 0x00000000, 0x03e00008, 0x00000000,
+	0x27bdffe0, 0x00805021, 0x14c00017, 0x254c0008, 0x3c020800, 0x8c421fd4,
+	0x1040000a, 0x2402003e, 0x3c010800, 0xa4221fb0, 0x24020016, 0x3c010800,
+	0xa4221fb2, 0x2402002a, 0x3c010800, 0x0a00031a, 0xa4221fb4, 0x95420014,
+	0x3c010800, 0xa4221fb0, 0x8d430010, 0x00031402, 0x3c010800, 0xa4221fb2,
+	0x3c010800, 0xa4231fb4, 0x3c040800, 0x94841fb4, 0x3c030800, 0x94631fb2,
+	0x958d0006, 0x3c020800, 0x94421fb0, 0x00832023, 0x01a27023, 0x3065ffff,
+	0x24a20028, 0x01824021, 0x3082ffff, 0x14c0001a, 0x01025821, 0x9562000c,
+	0x3042003f, 0x3c010800, 0xa4221fb6, 0x95620004, 0x95630006, 0x3c010800,
+	0xac201fc4, 0x3c010800, 0xac201fc8, 0x00021400, 0x00431025, 0x3c010800,
+	0xac221720, 0x95020004, 0x3c010800, 0xa4221724, 0x95030002, 0x01a51023,
+	0x0043102a, 0x10400010, 0x24020001, 0x3c010800, 0x0a00034e, 0xac221fd8,
+	0x3c030800, 0x8c631fc8, 0x3c020800, 0x94421724, 0x00431021, 0xa5020004,
+	0x3c020800, 0x94421720, 0xa5620004, 0x3c020800, 0x8c421720, 0xa5620006,
+	0x3c020800, 0x8c421fd0, 0x3c070800, 0x8ce71fc4, 0x3c050800, 0x144000c7,
+	0x8ca51fc8, 0x3c020800, 0x94421724, 0x00451821, 0x3063ffff, 0x0062182b,
+	0x24020002, 0x10c2000d, 0x00a32823, 0x3c020800, 0x94421fb6, 0x30420009,
+	0x10400008, 0x00000000, 0x9562000c, 0x3042fff6, 0xa562000c, 0x3c020800,
+	0x94421fb6, 0x30420009, 0x00e23823, 0x3c020800, 0x8c421fd8, 0x1040004b,
+	0x24020002, 0x01003021, 0x3c020800, 0x94421fb2, 0x00003821, 0xa500000a,
+	0x01a21023, 0xa5020002, 0x3082ffff, 0x00021042, 0x18400008, 0x00002821,
+	0x00401821, 0x94c20000, 0x24e70001, 0x00a22821, 0x00e3102a, 0x1440fffb,
+	0x24c60002, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402, 0x00a22821,
+	0x00a04821, 0x00051027, 0xa502000a, 0x00002821, 0x2506000c, 0x00003821,
+	0x94c20000, 0x24e70001, 0x00a22821, 0x2ce20004, 0x1440fffb, 0x24c60002,
+	0x95020002, 0x00003821, 0x91030009, 0x00442023, 0x01603021, 0x3082ffff,
+	0xa4c00010, 0x00621821, 0x00021042, 0x18400010, 0x00a32821, 0x00404021,
+	0x94c20000, 0x24c60002, 0x00a22821, 0x30c2007f, 0x14400006, 0x24e70001,
+	0x8d430000, 0x3c02007f, 0x3442ff80, 0x00625024, 0x25460008, 0x00e8102a,
+	0x1440fff3, 0x00000000, 0x30820001, 0x10400005, 0x00051c02, 0xa0c00001,
+	0x94c20000, 0x00a22821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402,
+	0x00a22821, 0x0a000415, 0x30a5ffff, 0x14c20063, 0x00000000, 0x3c090800,
+	0x95291fb2, 0x95030002, 0x01a91023, 0x1062005d, 0x01003021, 0x00003821,
+	0x00002821, 0x01a91023, 0xa5020002, 0x3082ffff, 0x00021042, 0x18400008,
+	0xa500000a, 0x00401821, 0x94c20000, 0x24e70001, 0x00a22821, 0x00e3102a,
+	0x1440fffb, 0x24c60002, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402,
+	0x00a22821, 0x00a04821, 0x00051027, 0xa502000a, 0x00002821, 0x2506000c,
+	0x00003821, 0x94c20000, 0x24e70001, 0x00a22821, 0x2ce20004, 0x1440fffb,
+	0x24c60002, 0x95020002, 0x00003821, 0x91030009, 0x00442023, 0x01603021,
+	0x3082ffff, 0xa4c00010, 0x3c040800, 0x94841fb4, 0x00621821, 0x00a32821,
+	0x00051c02, 0x30a2ffff, 0x00622821, 0x00051c02, 0x3c020800, 0x94421fb0,
+	0x00a34021, 0x00441023, 0x00021fc2, 0x00431021, 0x00021043, 0x18400010,
+	0x00002821, 0x00402021, 0x94c20000, 0x24c60002, 0x00a22821, 0x30c2007f,
+	0x14400006, 0x24e70001, 0x8d430000, 0x3c02007f, 0x3442ff80, 0x00625024,
+	0x25460008, 0x00e4102a, 0x1440fff3, 0x00000000, 0x3c020800, 0x94421fcc,
+	0x00a22821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402, 0x00a22821,
+	0x3102ffff, 0x00a22821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402,
+	0x00a22821, 0x00a02021, 0x00051027, 0xa5620010, 0xad800014, 0x0a000435,
+	0xad800000, 0x8d830010, 0x00602021, 0x10a00007, 0x00034c02, 0x01252821,
+	0x00051402, 0x30a3ffff, 0x00432821, 0x00051402, 0x00a24821, 0x00091027,
+	0xa502000a, 0x3c030800, 0x94631fb4, 0x3082ffff, 0x01a21021, 0x00432823,
+	0x00a72821, 0x00051c02, 0x30a2ffff, 0x00622821, 0x00051402, 0x00a22821,
+	0x00a02021, 0x00051027, 0xa5620010, 0x3082ffff, 0x00091c00, 0x00431025,
+	0xad820010, 0x3c020800, 0x8c421fd4, 0x10400002, 0x25a2fff2, 0xa5820034,
+	0x3c020800, 0x8c421fc8, 0x3c030800, 0x8c631720, 0x24420001, 0x3c010800,
+	0xac221fc8, 0x3c020800, 0x8c421fc4, 0x31c4ffff, 0x00641821, 0x3c010800,
+	0xac231720, 0x00441021, 0x3c010800, 0xac221fc4, 0x03e00008, 0x27bd0020,
+	0x27bdffc8, 0x3c040800, 0x248416f8, 0xafbf0034, 0xafbe0030, 0xafb7002c,
+	0xafb60028, 0xafb50024, 0xafb40020, 0xafb3001c, 0xafb20018, 0xafb10014,
+	0xafb00010, 0x90830000, 0x24020003, 0x146200f4, 0x00000000, 0x3c020800,
+	0x8c421710, 0x3c030800, 0x8c63170c, 0x3c1e0800, 0x97de16f6, 0x0043102a,
+	0x104000eb, 0x3c168000, 0x249708c4, 0x33d5ffff, 0x24920018, 0x3c020800,
+	0x8c421718, 0x104000e4, 0x00000000, 0x3c140800, 0x96941fb0, 0x3282ffff,
+	0x104000d6, 0x00008021, 0x00409821, 0x00008821, 0x8f634450, 0x3c020800,
+	0x8c4216e8, 0x00031c02, 0x0043102b, 0x14400008, 0x00000000, 0x3c040800,
+	0x8c8416ec, 0x8f624450, 0x00021c02, 0x0083102b, 0x1040fffc, 0x00000000,
+	0xaf764444, 0x8f624444, 0x00561024, 0x10400006, 0x00000000, 0x3c038000,
+	0x8f624444, 0x00431024, 0x1440fffd, 0x00000000, 0x8f624448, 0x3046ffff,
+	0x10c0005f, 0x00000000, 0x3c090800, 0x01314821, 0x8d291728, 0x9528000a,
+	0x31020040, 0x10400004, 0x2407180c, 0x8d22000c, 0x2407188c, 0xacc20018,
+	0x31021000, 0x10400004, 0x34e32000, 0x00081040, 0x3042c000, 0x00623825,
+	0x31020080, 0x54400001, 0x34e70010, 0x3c020800, 0x00511021, 0x8c421730,
+	0x3c030800, 0x00711821, 0x8c631734, 0x00021500, 0x00031c00, 0x00431025,
+	0xacc20014, 0x95240008, 0x3202ffff, 0x00821021, 0x0262102a, 0x14400002,
+	0x02902823, 0x00802821, 0x8d220000, 0x02058021, 0xacc20000, 0x8d220004,
+	0x00c02021, 0x26310010, 0xac820004, 0x30e2ffff, 0xac800008, 0xa485000e,
+	0xac820010, 0x24020305, 0x0e000560, 0xa482000c, 0x3202ffff, 0x0053102b,
+	0x1440ffaf, 0x3202ffff, 0x0a00054c, 0x00000000, 0x8e420000, 0x8e43fffc,
+	0x0043102a, 0x10400084, 0x00000000, 0x8e45fff0, 0x8f644450, 0x3c030800,
+	0x8c6316e8, 0x00051100, 0x3c090800, 0x01224821, 0x8d291728, 0x00041402,
+	0x0062182b, 0x14600008, 0x00000000, 0x3c030800, 0x8c6316ec, 0x8f624450,
+	0x00021402, 0x0062102b, 0x1040fffc, 0x00000000, 0xaf764444, 0x8f624444,
+	0x00561024, 0x10400006, 0x00000000, 0x3c038000, 0x8f624444, 0x00431024,
+	0x1440fffd, 0x00000000, 0x8f624448, 0x3046ffff, 0x14c00005, 0x00000000,
+	0x8ee20000, 0x24420001, 0x0a000554, 0xaee20000, 0x9528000a, 0x31020040,
+	0x10400004, 0x2407180c, 0x8d22000c, 0x2407188c, 0xacc20018, 0x31021000,
+	0x10400004, 0x34e32000, 0x00081040, 0x3042c000, 0x00623825, 0x00051900,
+	0x3c020800, 0x00431021, 0x8c421730, 0x3c010800, 0x00230821, 0x8c231734,
+	0x00021500, 0x00031c00, 0x00431025, 0xacc20014, 0x3c030800, 0x8c631704,
+	0x95220008, 0x00432023, 0x3202ffff, 0x3083ffff, 0x00431021, 0x02a2102a,
+	0x14400002, 0x03d02823, 0x00802821, 0x8e420000, 0x30a4ffff, 0x00441021,
+	0xae420000, 0xa4c5000e, 0x8d220000, 0xacc20000, 0x8d220004, 0x8e43fff4,
+	0x00431021, 0xacc20004, 0x8e43fff4, 0x95220008, 0x00641821, 0x0062102a,
+	0x14400006, 0x02058021, 0x8e42fff0, 0xae40fff4, 0x24420001, 0x0a000530,
+	0xae42fff0, 0xae43fff4, 0xacc00008, 0x3202ffff, 0x10550003, 0x31020004,
+	0x10400006, 0x24020305, 0x31020080, 0x54400001, 0x34e70010, 0x34e70020,
+	0x24020905, 0xa4c2000c, 0x30e2ffff, 0xacc20010, 0x3c030800, 0x8c63170c,
+	0x3c020800, 0x8c421710, 0x54620004, 0x3c02b49a, 0x3c024b65, 0x0a000548,
+	0x34427654, 0x344289ab, 0xacc2001c, 0x0e000560, 0x00c02021, 0x3202ffff,
+	0x0055102b, 0x1440ff7e, 0x00000000, 0x8e420000, 0x8e43fffc, 0x0043102a,
+	0x1440ff1a, 0x00000000, 0x8fbf0034, 0x8fbe0030, 0x8fb7002c, 0x8fb60028,
+	0x8fb50024, 0x8fb40020, 0x8fb3001c, 0x8fb20018, 0x8fb10014, 0x8fb00010,
+	0x03e00008, 0x27bd0038, 0x27bdffe8, 0xafbf0014, 0xafb00010, 0x8f624450,
+	0x8f634410, 0x0a00056f, 0x00808021, 0x8f626820, 0x30422000, 0x10400003,
+	0x00000000, 0x0e00025a, 0x00002021, 0x8f624450, 0x8f634410, 0x3042ffff,
+	0x0043102b, 0x1440fff5, 0x00000000, 0x8f630c14, 0x3063000f, 0x2c620002,
+	0x1440000b, 0x00000000, 0x8f630c14, 0x3c020800, 0x8c4216b4, 0x3063000f,
+	0x24420001, 0x3c010800, 0xac2216b4, 0x2c620002, 0x1040fff7, 0x00000000,
+	0xaf705c18, 0x8f625c10, 0x30420002, 0x10400009, 0x00000000, 0x8f626820,
+	0x30422000, 0x1040fff8, 0x00000000, 0x0e00025a, 0x00002021, 0x0a000582,
+	0x00000000, 0x8fbf0014, 0x8fb00010, 0x03e00008, 0x27bd0018, 0x00000000,
+	0x00000000
+};
+
+u32 tg3TsoFwRodata[] = {
+	0x4d61696e, 0x43707542, 0x00000000, 0x00000000, 0x74637073, 0x6567496e,
+	0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+#if 0 /* All zeros, dont eat up space with it. */
+u32 tg3TsoFwData[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000
+};
+#endif
+
+/* tp->lock is held. */
+static int tg3_load_tso_firmware(struct tg3 *tp)
+{
+	struct fw_info info;
+	int err, i;
+
+	info.text_base = TG3_TSO_FW_TEXT_ADDR;
+	info.text_len = TG3_TSO_FW_TEXT_LEN;
+	info.text_data = &tg3TsoFwText[0];
+	info.rodata_base = TG3_TSO_FW_RODATA_ADDR;
+	info.rodata_len = TG3_TSO_FW_RODATA_LEN;
+	info.rodata_data = &tg3TsoFwRodata[0];
+	info.data_base = TG3_TSO_FW_DATA_ADDR;
+	info.data_len = TG3_TSO_FW_DATA_LEN;
+	info.data_data = NULL;
+
+	err = tg3_load_firmware_cpu(tp, TX_CPU_BASE,
+				    TX_CPU_SCRATCH_BASE, TX_CPU_SCRATCH_SIZE,
+				    &info);
+	if (err)
+		return err;
+
+	/* Now startup only the TX cpu. */
+	tw32(TX_CPU_BASE + CPU_STATE, 0xffffffff);
+	tw32(TX_CPU_BASE + CPU_PC,    TG3_TSO_FW_TEXT_ADDR);
+
+	/* Flush posted writes. */
+	tr32(TX_CPU_BASE + CPU_PC);
+	for (i = 0; i < 5; i++) {
+		if (tr32(TX_CPU_BASE + CPU_PC) == TG3_TSO_FW_TEXT_ADDR)
+			break;
+		tw32(TX_CPU_BASE + CPU_STATE, 0xffffffff);
+		tw32(TX_CPU_BASE + CPU_MODE,  CPU_MODE_HALT);
+		tw32(TX_CPU_BASE + CPU_PC,    TG3_TSO_FW_TEXT_ADDR);
+
+		/* Flush posted writes. */
+		tr32(TX_CPU_BASE + CPU_PC);
+
+		udelay(1000);
+	}
+	if (i >= 5) {
+		printk(KERN_ERR PFX "tg3_load_tso_firmware fails for %s "
+		       "to set TX CPU PC, is %08x should be %08x\n",
+		       tp->dev->name, tr32(TX_CPU_BASE + CPU_PC),
+		       TG3_TSO_FW_TEXT_ADDR);
+		return -ENODEV;
+	}
+	tw32(TX_CPU_BASE + CPU_STATE, 0xffffffff);
+	tw32(TX_CPU_BASE + CPU_MODE,  0x00000000);
+
+	/* Flush posted writes. */
+	tr32(TX_CPU_BASE + CPU_MODE);
+
+	return 0;
+}
+
+#endif /* TG3_DO_TSO != 0 */
+
+/* tp->lock is held. */
+static void __tg3_set_mac_addr(struct tg3 *tp)
+{
+	u32 addr_high, addr_low;
+	int i;
+
+	addr_high = ((tp->dev->dev_addr[0] << 8) |
+		     tp->dev->dev_addr[1]);
+	addr_low = ((tp->dev->dev_addr[2] << 24) |
+		    (tp->dev->dev_addr[3] << 16) |
+		    (tp->dev->dev_addr[4] <<  8) |
+		    (tp->dev->dev_addr[5] <<  0));
+	for (i = 0; i < 4; i++) {
+		tw32(MAC_ADDR_0_HIGH + (i * 8), addr_high);
+		tw32(MAC_ADDR_0_LOW + (i * 8), addr_low);
+	}
+
+	addr_high = (tp->dev->dev_addr[0] +
+		     tp->dev->dev_addr[1] +
+		     tp->dev->dev_addr[2] +
+		     tp->dev->dev_addr[3] +
+		     tp->dev->dev_addr[4] +
+		     tp->dev->dev_addr[5]) &
+		TX_BACKOFF_SEED_MASK;
+	tw32(MAC_TX_BACKOFF_SEED, addr_high);
+}
+
+static int tg3_set_mac_addr(struct net_device *dev, void *p)
+{
+	struct tg3 *tp = dev->priv;
+	struct sockaddr *addr = p;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	spin_lock_irq(&tp->lock);
+	__tg3_set_mac_addr(tp);
+	spin_unlock_irq(&tp->lock);
+
+	return 0;
+}
+
+/* tp->lock is held. */
+static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
+			   dma_addr_t mapping, u32 maxlen_flags,
+			   u32 nic_addr)
+{
+	tg3_write_mem(tp,
+		      (bdinfo_addr +
+		       TG3_BDINFO_HOST_ADDR +
+		       TG3_64BIT_REG_HIGH),
+		      ((u64) mapping >> 32));
+	tg3_write_mem(tp,
+		      (bdinfo_addr +
+		       TG3_BDINFO_HOST_ADDR +
+		       TG3_64BIT_REG_LOW),
+		      ((u64) mapping & 0xffffffff));
+	tg3_write_mem(tp,
+		      (bdinfo_addr +
+		       TG3_BDINFO_MAXLEN_FLAGS),
+		       maxlen_flags);
+	tg3_write_mem(tp,
+		      (bdinfo_addr +
+		       TG3_BDINFO_NIC_ADDR),
+		      nic_addr);
+}
+
+static void __tg3_set_rx_mode(struct net_device *);
+
+/* tp->lock is held. */
+static int tg3_reset_hw(struct tg3 *tp)
+{
+	u32 val;
+	int i, err;
+
+	tg3_disable_ints(tp);
+
+	tg3_stop_fw(tp);
+
+	if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) {
+		err = tg3_abort_hw(tp);
+		if (err)
+			return err;
+	}
+
+	tg3_chip_reset(tp);
+
+	tw32(GRC_MODE, tp->grc_mode);
+	tg3_write_mem(tp,
+		      NIC_SRAM_FIRMWARE_MBOX,
+		      NIC_SRAM_FIRMWARE_MBOX_MAGIC1);
+	if (tp->phy_id == PHY_ID_SERDES) {
+		tp->mac_mode = MAC_MODE_PORT_MODE_TBI;
+		tw32(MAC_MODE, tp->mac_mode);
+	} else
+		tw32(MAC_MODE, 0);
+	tr32(MAC_MODE);
+	udelay(40);
+
+	/* Wait for firmware initialization to complete. */
+	for (i = 0; i < 100000; i++) {
+		tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
+		if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1)
+			break;
+		udelay(10);
+	}
+	if (i >= 100000) {
+		printk(KERN_ERR PFX "tg3_reset_hw timed out for %s, "
+		       "firmware will not restart magic=%08x\n",
+		       tp->dev->name, val);
+		return -ENODEV;
+	}
+
+	if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF)
+		tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+			      DRV_STATE_START);
+	else
+		tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX,
+			      DRV_STATE_SUSPEND);
+
+	/* This works around an issue with Athlon chipsets on
+	 * B3 tigon3 silicon.  This bit has no effect on any
+	 * other revision.
+	 */
+	val = tr32(TG3PCI_CLOCK_CTRL);
+	val |= CLOCK_CTRL_DELAY_PCI_GRANT;
+	tw32(TG3PCI_CLOCK_CTRL, val);
+	tr32(TG3PCI_CLOCK_CTRL);
+
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0 &&
+	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) {
+		val = tr32(TG3PCI_PCISTATE);
+		val |= PCISTATE_RETRY_SAME_DMA;
+		tw32(TG3PCI_PCISTATE, val);
+	}
+
+	/* Clear statistics/status block in chip, and status block in ram. */
+	for (i = NIC_SRAM_STATS_BLK;
+	     i < NIC_SRAM_STATUS_BLK + TG3_HW_STATUS_SIZE;
+	     i += sizeof(u32)) {
+		tg3_write_mem(tp, i, 0);
+		udelay(40);
+	}
+	memset(tp->hw_status, 0, TG3_HW_STATUS_SIZE);
+
+	/* This value is determined during the probe time DMA
+	 * engine test, tg3_test_dma.
+	 */
+	tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+
+	tp->grc_mode &= ~(GRC_MODE_HOST_SENDBDS |
+			  GRC_MODE_4X_NIC_SEND_RINGS |
+			  GRC_MODE_NO_TX_PHDR_CSUM |
+			  GRC_MODE_NO_RX_PHDR_CSUM);
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS)
+		tp->grc_mode |= GRC_MODE_HOST_SENDBDS;
+	else
+		tp->grc_mode |= GRC_MODE_4X_NIC_SEND_RINGS;
+	if (tp->tg3_flags & TG3_FLAG_NO_TX_PSEUDO_CSUM)
+		tp->grc_mode |= GRC_MODE_NO_TX_PHDR_CSUM;
+	if (tp->tg3_flags & TG3_FLAG_NO_RX_PSEUDO_CSUM)
+		tp->grc_mode |= GRC_MODE_NO_RX_PHDR_CSUM;
+
+	tw32(GRC_MODE,
+	     tp->grc_mode |
+	     (GRC_MODE_IRQ_ON_MAC_ATTN | GRC_MODE_HOST_STACKUP));
+
+	/* Setup the timer prescalar register.  Clock is always 66Mhz. */
+	tw32(GRC_MISC_CFG,
+	     (65 << GRC_MISC_CFG_PRESCALAR_SHIFT));
+
+	/* Initialize MBUF/DESC pool. */
+	tw32(BUFMGR_MB_POOL_ADDR, NIC_SRAM_MBUF_POOL_BASE);
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
+		tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE64);
+	else
+		tw32(BUFMGR_MB_POOL_SIZE, NIC_SRAM_MBUF_POOL_SIZE96);
+	tw32(BUFMGR_DMA_DESC_POOL_ADDR, NIC_SRAM_DMA_DESC_POOL_BASE);
+	tw32(BUFMGR_DMA_DESC_POOL_SIZE, NIC_SRAM_DMA_DESC_POOL_SIZE);
+
+	if (!(tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE)) {
+		tw32(BUFMGR_MB_RDMA_LOW_WATER,
+		     tp->bufmgr_config.mbuf_read_dma_low_water);
+		tw32(BUFMGR_MB_MACRX_LOW_WATER,
+		     tp->bufmgr_config.mbuf_mac_rx_low_water);
+		tw32(BUFMGR_MB_HIGH_WATER,
+		     tp->bufmgr_config.mbuf_high_water);
+	} else {
+		tw32(BUFMGR_MB_RDMA_LOW_WATER,
+		     tp->bufmgr_config.mbuf_read_dma_low_water_jumbo);
+		tw32(BUFMGR_MB_MACRX_LOW_WATER,
+		     tp->bufmgr_config.mbuf_mac_rx_low_water_jumbo);
+		tw32(BUFMGR_MB_HIGH_WATER,
+		     tp->bufmgr_config.mbuf_high_water_jumbo);
+	}
+	tw32(BUFMGR_DMA_LOW_WATER,
+	     tp->bufmgr_config.dma_low_water);
+	tw32(BUFMGR_DMA_HIGH_WATER,
+	     tp->bufmgr_config.dma_high_water);
+
+	tw32(BUFMGR_MODE, BUFMGR_MODE_ENABLE | BUFMGR_MODE_ATTN_ENABLE);
+	for (i = 0; i < 2000; i++) {
+		if (tr32(BUFMGR_MODE) & BUFMGR_MODE_ENABLE)
+			break;
+		udelay(10);
+	}
+	if (i >= 2000) {
+		printk(KERN_ERR PFX "tg3_reset_hw cannot enable BUFMGR for %s.\n",
+		       tp->dev->name);
+		return -ENODEV;
+	}
+
+	tw32(FTQ_RESET, 0xffffffff);
+	tw32(FTQ_RESET, 0x00000000);
+	for (i = 0; i < 2000; i++) {
+		if (tr32(FTQ_RESET) == 0x00000000)
+			break;
+		udelay(10);
+	}
+	if (i >= 2000) {
+		printk(KERN_ERR PFX "tg3_reset_hw cannot reset FTQ for %s.\n",
+		       tp->dev->name);
+		return -ENODEV;
+	}
+
+	/* Initialize TG3_BDINFO's at:
+	 *  RCVDBDI_STD_BD:	standard eth size rx ring
+	 *  RCVDBDI_JUMBO_BD:	jumbo frame rx ring
+	 *  RCVDBDI_MINI_BD:	small frame rx ring (??? does not work)
+	 *
+	 * like so:
+	 *  TG3_BDINFO_HOST_ADDR:	high/low parts of DMA address of ring
+	 *  TG3_BDINFO_MAXLEN_FLAGS:	(rx max buffer size << 16) |
+	 *                              ring attribute flags
+	 *  TG3_BDINFO_NIC_ADDR:	location of descriptors in nic SRAM
+	 *
+	 * Standard receive ring @ NIC_SRAM_RX_BUFFER_DESC, 512 entries.
+	 * Jumbo receive ring @ NIC_SRAM_RX_JUMBO_BUFFER_DESC, 256 entries.
+	 *
+	 * The size of each ring is fixed in the firmware, but the location is
+	 * configurable.
+	 */
+	tw32(RCVDBDI_STD_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_HIGH,
+	     ((u64) tp->rx_std_mapping >> 32));
+	tw32(RCVDBDI_STD_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_LOW,
+	     ((u64) tp->rx_std_mapping & 0xffffffff));
+	tw32(RCVDBDI_STD_BD + TG3_BDINFO_MAXLEN_FLAGS,
+	     RX_STD_MAX_SIZE << BDINFO_FLAGS_MAXLEN_SHIFT);
+	tw32(RCVDBDI_STD_BD + TG3_BDINFO_NIC_ADDR,
+	     NIC_SRAM_RX_BUFFER_DESC);
+
+	tw32(RCVDBDI_MINI_BD + TG3_BDINFO_MAXLEN_FLAGS,
+	     BDINFO_FLAGS_DISABLED);
+
+	if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE) {
+		tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_HIGH,
+		     ((u64) tp->rx_jumbo_mapping >> 32));
+		tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_HOST_ADDR + TG3_64BIT_REG_LOW,
+		     ((u64) tp->rx_jumbo_mapping & 0xffffffff));
+		tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_MAXLEN_FLAGS,
+		     RX_JUMBO_MAX_SIZE << BDINFO_FLAGS_MAXLEN_SHIFT);
+		tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_NIC_ADDR,
+		     NIC_SRAM_RX_JUMBO_BUFFER_DESC);
+	} else {
+		tw32(RCVDBDI_JUMBO_BD + TG3_BDINFO_MAXLEN_FLAGS,
+		     BDINFO_FLAGS_DISABLED);
+	}
+
+	/* Setup replenish thresholds. */
+	tw32(RCVBDI_STD_THRESH, tp->rx_pending / 8);
+	tw32(RCVBDI_JUMBO_THRESH, tp->rx_jumbo_pending / 8);
+
+	/* Clear out send RCB ring in SRAM. */
+	for (i = NIC_SRAM_SEND_RCB; i < NIC_SRAM_RCV_RET_RCB; i += TG3_BDINFO_SIZE)
+		tg3_write_mem(tp, i + TG3_BDINFO_MAXLEN_FLAGS, BDINFO_FLAGS_DISABLED);
+
+	tp->tx_prod = 0;
+	tp->tx_cons = 0;
+	tw32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, 0);
+	tw32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + TG3_64BIT_REG_LOW, 0);
+	if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+		tr32(MAILBOX_SNDNIC_PROD_IDX_0 + TG3_64BIT_REG_LOW);
+
+	if (tp->tg3_flags & TG3_FLAG_HOST_TXDS) {
+		tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
+			       tp->tx_desc_mapping,
+			       (TG3_TX_RING_SIZE <<
+				BDINFO_FLAGS_MAXLEN_SHIFT),
+			       NIC_SRAM_TX_BUFFER_DESC);
+	} else {
+		tg3_set_bdinfo(tp, NIC_SRAM_SEND_RCB,
+			       0,
+			       BDINFO_FLAGS_DISABLED,
+			       NIC_SRAM_TX_BUFFER_DESC);
+	}
+
+	for (i = NIC_SRAM_RCV_RET_RCB; i < NIC_SRAM_STATS_BLK; i += TG3_BDINFO_SIZE) {
+		tg3_write_mem(tp, i + TG3_BDINFO_MAXLEN_FLAGS,
+			      BDINFO_FLAGS_DISABLED);
+	}
+
+	tp->rx_rcb_ptr = 0;
+	tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW, 0);
+	if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+		tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW);
+
+	tg3_set_bdinfo(tp, NIC_SRAM_RCV_RET_RCB,
+		       tp->rx_rcb_mapping,
+		       (TG3_RX_RCB_RING_SIZE <<
+			BDINFO_FLAGS_MAXLEN_SHIFT),
+		       0);
+
+	tp->rx_std_ptr = tp->rx_pending;
+	tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW,
+		     tp->rx_std_ptr);
+	if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+		tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW);
+
+	if (tp->tg3_flags & TG3_FLAG_JUMBO_ENABLE)
+		tp->rx_jumbo_ptr = tp->rx_jumbo_pending;
+	else
+		tp->rx_jumbo_ptr = 0;
+	tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW,
+		     tp->rx_jumbo_ptr);
+	if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+		tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW);
+
+	/* Initialize MAC address and backoff seed. */
+	__tg3_set_mac_addr(tp);
+
+	/* MTU + ethernet header + FCS + optional VLAN tag */
+	tw32(MAC_RX_MTU_SIZE, tp->dev->mtu + ETH_HLEN + 8);
+
+	/* The slot time is changed by tg3_setup_phy if we
+	 * run at gigabit with half duplex.
+	 */
+	tw32(MAC_TX_LENGTHS,
+	     (2 << TX_LENGTHS_IPG_CRS_SHIFT) |
+	     (6 << TX_LENGTHS_IPG_SHIFT) |
+	     (32 << TX_LENGTHS_SLOT_TIME_SHIFT));
+
+	/* Receive rules. */
+	tw32(MAC_RCV_RULE_CFG, RCV_RULE_CFG_DEFAULT_CLASS);
+	tw32(RCVLPC_CONFIG, 0x0181);
+
+	/* Receive/send statistics. */
+	tw32(RCVLPC_STATS_ENABLE, 0xffffff);
+	tw32(RCVLPC_STATSCTRL, RCVLPC_STATSCTRL_ENABLE);
+	tw32(SNDDATAI_STATSENAB, 0xffffff);
+	tw32(SNDDATAI_STATSCTRL,
+	     (SNDDATAI_SCTRL_ENABLE |
+	      SNDDATAI_SCTRL_FASTUPD));
+
+	/* Setup host coalescing engine. */
+	tw32(HOSTCC_MODE, 0);
+	for (i = 0; i < 2000; i++) {
+		if (!(tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE))
+			break;
+		udelay(10);
+	}
+
+        // akw: I have set these all back to default coalescing values.
+        
+	tw32(HOSTCC_RXCOL_TICKS, DEFAULT_RXCOL_TICKS); //0);
+	tw32(HOSTCC_RXMAX_FRAMES, DEFAULT_RXMAX_FRAMES); //1);
+	tw32(HOSTCC_RXCOAL_TICK_INT, DEFAULT_RXCOAL_TICK_INT); //, 0);
+	tw32(HOSTCC_RXCOAL_MAXF_INT, DEFAULT_RXCOAL_MAXF_INT); //, 1);
+	tw32(HOSTCC_TXCOL_TICKS, DEFAULT_TXCOL_TICKS); //, LOW_TXCOL_TICKS);
+	tw32(HOSTCC_TXMAX_FRAMES, DEFAULT_TXMAX_FRAMES); //, LOW_RXMAX_FRAMES);
+	tw32(HOSTCC_TXCOAL_TICK_INT, DEFAULT_TXCOAL_TICK_INT); //, 0);
+	tw32(HOSTCC_TXCOAL_MAXF_INT, DEFAULT_TXCOAL_MAXF_INT); //, 0);
+	tw32(HOSTCC_STAT_COAL_TICKS,
+	     DEFAULT_STAT_COAL_TICKS);
+
+	/* Status/statistics block address. */
+	tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
+	     ((u64) tp->stats_mapping >> 32));
+	tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
+	     ((u64) tp->stats_mapping & 0xffffffff));
+	tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
+	     ((u64) tp->status_mapping >> 32));
+	tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
+	     ((u64) tp->status_mapping & 0xffffffff));
+	tw32(HOSTCC_STATS_BLK_NIC_ADDR, NIC_SRAM_STATS_BLK);
+	tw32(HOSTCC_STATUS_BLK_NIC_ADDR, NIC_SRAM_STATUS_BLK);
+
+	tw32(HOSTCC_MODE, HOSTCC_MODE_ENABLE | tp->coalesce_mode);
+
+	tw32(RCVCC_MODE, RCVCC_MODE_ENABLE | RCVCC_MODE_ATTN_ENABLE);
+	tw32(RCVLPC_MODE, RCVLPC_MODE_ENABLE);
+	tw32(RCVLSC_MODE, RCVLSC_MODE_ENABLE | RCVLSC_MODE_ATTN_ENABLE);
+
+	tp->mac_mode = MAC_MODE_TXSTAT_ENABLE | MAC_MODE_RXSTAT_ENABLE |
+		MAC_MODE_TDE_ENABLE | MAC_MODE_RDE_ENABLE | MAC_MODE_FHDE_ENABLE;
+	tw32(MAC_MODE, tp->mac_mode | MAC_MODE_RXSTAT_CLEAR | MAC_MODE_TXSTAT_CLEAR);
+	tr32(MAC_MODE);
+	udelay(40);
+
+	tp->grc_local_ctrl = GRC_LCLCTRL_INT_ON_ATTN | GRC_LCLCTRL_AUTO_SEEPROM;
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+		tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 |
+				       GRC_LCLCTRL_GPIO_OUTPUT1);
+	tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
+	tr32(GRC_LOCAL_CTRL);
+	udelay(100);
+
+	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
+	tr32(MAILBOX_INTERRUPT_0);
+
+	tw32(DMAC_MODE, DMAC_MODE_ENABLE);
+	tr32(DMAC_MODE);
+	udelay(40);
+
+	tw32(WDMAC_MODE, (WDMAC_MODE_ENABLE | WDMAC_MODE_TGTABORT_ENAB |
+			  WDMAC_MODE_MSTABORT_ENAB | WDMAC_MODE_PARITYERR_ENAB |
+			  WDMAC_MODE_ADDROFLOW_ENAB | WDMAC_MODE_FIFOOFLOW_ENAB |
+			  WDMAC_MODE_FIFOURUN_ENAB | WDMAC_MODE_FIFOOREAD_ENAB |
+			  WDMAC_MODE_LNGREAD_ENAB));
+	tr32(WDMAC_MODE);
+	udelay(40);
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE)) {
+		val = tr32(TG3PCI_X_CAPS);
+		val &= ~(PCIX_CAPS_SPLIT_MASK | PCIX_CAPS_BURST_MASK);
+		val |= (PCIX_CAPS_MAX_BURST_5704 << PCIX_CAPS_BURST_SHIFT);
+		if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE)
+			val |= (tp->split_mode_max_reqs <<
+				PCIX_CAPS_SPLIT_SHIFT);
+		tw32(TG3PCI_X_CAPS, val);
+	}
+
+	val = (RDMAC_MODE_ENABLE | RDMAC_MODE_TGTABORT_ENAB |
+	       RDMAC_MODE_MSTABORT_ENAB | RDMAC_MODE_PARITYERR_ENAB |
+	       RDMAC_MODE_ADDROFLOW_ENAB | RDMAC_MODE_FIFOOFLOW_ENAB |
+	       RDMAC_MODE_FIFOURUN_ENAB | RDMAC_MODE_FIFOOREAD_ENAB |
+	       RDMAC_MODE_LNGREAD_ENAB);
+	if (tp->tg3_flags & TG3_FLAG_SPLIT_MODE)
+		val |= RDMAC_MODE_SPLIT_ENABLE;
+	tw32(RDMAC_MODE, val);
+	tr32(RDMAC_MODE);
+	udelay(40);
+
+	tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE);
+	tw32(MBFREE_MODE, MBFREE_MODE_ENABLE);
+	tw32(SNDDATAC_MODE, SNDDATAC_MODE_ENABLE);
+	tw32(SNDBDC_MODE, SNDBDC_MODE_ENABLE | SNDBDC_MODE_ATTN_ENABLE);
+	tw32(RCVBDI_MODE, RCVBDI_MODE_ENABLE | RCVBDI_MODE_RCB_ATTN_ENAB);
+	tw32(RCVDBDI_MODE, RCVDBDI_MODE_ENABLE | RCVDBDI_MODE_INV_RING_SZ);
+	tw32(SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
+	tw32(SNDBDI_MODE, SNDBDI_MODE_ENABLE | SNDBDI_MODE_ATTN_ENABLE);
+	tw32(SNDBDS_MODE, SNDBDS_MODE_ENABLE | SNDBDS_MODE_ATTN_ENABLE);
+
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0) {
+		err = tg3_load_5701_a0_firmware_fix(tp);
+		if (err)
+			return err;
+	}
+
+#if TG3_DO_TSO != 0
+	err = tg3_load_tso_firmware(tp);
+	if (err)
+		return err;
+#endif
+
+	tp->tx_mode = TX_MODE_ENABLE;
+	tw32(MAC_TX_MODE, tp->tx_mode);
+	tr32(MAC_TX_MODE);
+	udelay(100);
+
+	tp->rx_mode = RX_MODE_ENABLE;
+	tw32(MAC_RX_MODE, tp->rx_mode);
+	tr32(MAC_RX_MODE);
+	udelay(10);
+
+	if (tp->link_config.phy_is_low_power) {
+		tp->link_config.phy_is_low_power = 0;
+		tp->link_config.speed = tp->link_config.orig_speed;
+		tp->link_config.duplex = tp->link_config.orig_duplex;
+		tp->link_config.autoneg = tp->link_config.orig_autoneg;
+	}
+
+	tp->mi_mode = MAC_MI_MODE_BASE;
+	tw32(MAC_MI_MODE, tp->mi_mode);
+	tr32(MAC_MI_MODE);
+	udelay(40);
+
+	tw32(MAC_LED_CTRL, 0);
+	tw32(MAC_MI_STAT, MAC_MI_STAT_LNKSTAT_ATTN_ENAB);
+	tw32(MAC_RX_MODE, RX_MODE_RESET);
+	tr32(MAC_RX_MODE);
+	udelay(10);
+	tw32(MAC_RX_MODE, tp->rx_mode);
+	tr32(MAC_RX_MODE);
+	udelay(10);
+
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1)
+		tw32(MAC_SERDES_CFG, 0x616000);
+
+	err = tg3_setup_phy(tp);
+	if (err)
+		return err;
+
+	if (tp->phy_id != PHY_ID_SERDES) {
+		u32 tmp;
+
+		/* Clear CRC stats. */
+		tg3_readphy(tp, 0x1e, &tmp);
+		tg3_writephy(tp, 0x1e, tmp | 0x8000);
+		tg3_readphy(tp, 0x14, &tmp);
+	}
+
+	__tg3_set_rx_mode(tp->dev);
+
+	/* Initialize receive rules. */
+	tw32(MAC_RCV_RULE_0,  0xc2000000 & RCV_RULE_DISABLE_MASK);
+	tw32(MAC_RCV_VALUE_0, 0xffffffff & RCV_RULE_DISABLE_MASK);
+	tw32(MAC_RCV_RULE_1,  0x86000004 & RCV_RULE_DISABLE_MASK);
+	tw32(MAC_RCV_VALUE_1, 0xffffffff & RCV_RULE_DISABLE_MASK);
+#if 0
+	tw32(MAC_RCV_RULE_2,  0); tw32(MAC_RCV_VALUE_2,  0);
+	tw32(MAC_RCV_RULE_3,  0); tw32(MAC_RCV_VALUE_3,  0);
+#endif
+	tw32(MAC_RCV_RULE_4,  0); tw32(MAC_RCV_VALUE_4,  0);
+	tw32(MAC_RCV_RULE_5,  0); tw32(MAC_RCV_VALUE_5,  0);
+	tw32(MAC_RCV_RULE_6,  0); tw32(MAC_RCV_VALUE_6,  0);
+	tw32(MAC_RCV_RULE_7,  0); tw32(MAC_RCV_VALUE_7,  0);
+	tw32(MAC_RCV_RULE_8,  0); tw32(MAC_RCV_VALUE_8,  0);
+	tw32(MAC_RCV_RULE_9,  0); tw32(MAC_RCV_VALUE_9,  0);
+	tw32(MAC_RCV_RULE_10,  0); tw32(MAC_RCV_VALUE_10,  0);
+	tw32(MAC_RCV_RULE_11,  0); tw32(MAC_RCV_VALUE_11,  0);
+	tw32(MAC_RCV_RULE_12,  0); tw32(MAC_RCV_VALUE_12,  0);
+	tw32(MAC_RCV_RULE_13,  0); tw32(MAC_RCV_VALUE_13,  0);
+	tw32(MAC_RCV_RULE_14,  0); tw32(MAC_RCV_VALUE_14,  0);
+	tw32(MAC_RCV_RULE_15,  0); tw32(MAC_RCV_VALUE_15,  0);
+
+	if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE)
+		tg3_enable_ints(tp);
+
+	return 0;
+}
+
+/* Called at device open time to get the chip ready for
+ * packet processing.  Invoked with tp->lock held.
+ */
+static int tg3_init_hw(struct tg3 *tp)
+{
+	int err;
+
+	/* Force the chip into D0. */
+	err = tg3_set_power_state(tp, 0);
+	if (err)
+		goto out;
+
+	tg3_switch_clocks(tp);
+
+	tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);
+
+	err = tg3_reset_hw(tp);
+
+out:
+	return err;
+}
+
+static void tg3_timer(unsigned long __opaque)
+{
+	struct tg3 *tp = (struct tg3 *) __opaque;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tp->lock, flags);
+	spin_lock(&tp->tx_lock);
+
+	/* All of this garbage is because when using non-tagged
+	 * IRQ status the mailbox/status_block protocol the chip
+	 * uses with the cpu is race prone.
+	 */
+	if (tp->hw_status->status & SD_STATUS_UPDATED) {
+		tw32(GRC_LOCAL_CTRL,
+		     tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+	} else {
+		tw32(HOSTCC_MODE, tp->coalesce_mode |
+		     (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
+	}
+
+	if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+		tg3_halt(tp);
+		tg3_init_rings(tp);
+		tg3_init_hw(tp);
+	}
+
+	/* This part only runs once per second. */
+	if (!--tp->timer_counter) {
+		if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) {
+			u32 mac_stat;
+			int phy_event;
+
+			mac_stat = tr32(MAC_STATUS);
+
+			phy_event = 0;
+			if (tp->tg3_flags & TG3_FLAG_USE_MI_INTERRUPT) {
+				if (mac_stat & MAC_STATUS_MI_INTERRUPT)
+					phy_event = 1;
+			} else if (mac_stat & MAC_STATUS_LNKSTATE_CHANGED)
+				phy_event = 1;
+
+			if (phy_event)
+				tg3_setup_phy(tp);
+		} else if (tp->tg3_flags & TG3_FLAG_POLL_SERDES) {
+			u32 mac_stat = tr32(MAC_STATUS);
+			int need_setup = 0;
+
+			if (netif_carrier_ok(tp->dev) &&
+			    (mac_stat & MAC_STATUS_LNKSTATE_CHANGED)) {
+				need_setup = 1;
+			}
+			if (! netif_carrier_ok(tp->dev) &&
+			    (mac_stat & MAC_STATUS_PCS_SYNCED)) {
+				need_setup = 1;
+			}
+			if (need_setup) {
+				tw32(MAC_MODE,
+				     (tp->mac_mode &
+				      ~MAC_MODE_PORT_MODE_MASK));
+				tr32(MAC_MODE);
+				udelay(40);
+				tw32(MAC_MODE, tp->mac_mode);
+				tr32(MAC_MODE);
+				udelay(40);
+				tg3_setup_phy(tp);
+			}
+		}
+
+		tp->timer_counter = tp->timer_multiplier;
+	}
+
+	/* Heartbeat is only sent once every 120 seconds.  */
+	if (!--tp->asf_counter) {
+		if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+			u32 val;
+
+			tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_ALIVE);
+			tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 4);
+			tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, 3);
+			val = tr32(GRC_RX_CPU_EVENT);
+			val |= (1 << 14);
+			tw32(GRC_RX_CPU_EVENT, val);
+		}
+		tp->asf_counter = tp->asf_multiplier;
+	}
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irqrestore(&tp->lock, flags);
+
+	tp->timer.expires = jiffies + tp->timer_offset;
+	add_timer(&tp->timer);
+}
+
+static int tg3_open(struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+	int err;
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tg3_disable_ints(tp);
+	tp->tg3_flags &= ~TG3_FLAG_INIT_COMPLETE;
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	/* If you move this call, make sure TG3_FLAG_HOST_TXDS in
+	 * tp->tg3_flags is accurate at that new place.
+	 */
+	err = tg3_alloc_consistent(tp);
+	if (err)
+		return err;
+
+	err = request_irq(dev->irq, tg3_interrupt,
+			  SA_SHIRQ, dev->name, dev);
+
+	if (err) {
+		tg3_free_consistent(tp);
+		return err;
+	}
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tg3_init_rings(tp);
+
+	err = tg3_init_hw(tp);
+	if (err) {
+		tg3_halt(tp);
+		tg3_free_rings(tp);
+	} else {
+		tp->timer_offset = HZ / 10;
+		tp->timer_counter = tp->timer_multiplier = 10;
+		tp->asf_counter = tp->asf_multiplier = (10 * 120);
+
+		init_timer(&tp->timer);
+		tp->timer.expires = jiffies + tp->timer_offset;
+		tp->timer.data = (unsigned long) tp;
+		tp->timer.function = tg3_timer;
+		add_timer(&tp->timer);
+
+		tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+	}
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	if (err) {
+		free_irq(dev->irq, dev);
+		tg3_free_consistent(tp);
+		return err;
+	}
+
+	netif_start_queue(dev);
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tg3_enable_ints(tp);
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	return 0;
+}
+
+#if 0
+/*static*/ void tg3_dump_state(struct tg3 *tp)
+{
+	u32 val32, val32_2, val32_3, val32_4, val32_5;
+	u16 val16;
+	int i;
+
+	pci_read_config_word(tp->pdev, PCI_STATUS, &val16);
+	pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE, &val32);
+	printk("DEBUG: PCI status [%04x] TG3PCI state[%08x]\n",
+	       val16, val32);
+
+	/* MAC block */
+	printk("DEBUG: MAC_MODE[%08x] MAC_STATUS[%08x]\n",
+	       tr32(MAC_MODE), tr32(MAC_STATUS));
+	printk("       MAC_EVENT[%08x] MAC_LED_CTRL[%08x]\n",
+	       tr32(MAC_EVENT), tr32(MAC_LED_CTRL));
+	printk("DEBUG: MAC_TX_MODE[%08x] MAC_TX_STATUS[%08x]\n",
+	       tr32(MAC_TX_MODE), tr32(MAC_TX_STATUS));
+	printk("       MAC_RX_MODE[%08x] MAC_RX_STATUS[%08x]\n",
+	       tr32(MAC_RX_MODE), tr32(MAC_RX_STATUS));
+
+	/* Send data initiator control block */
+	printk("DEBUG: SNDDATAI_MODE[%08x] SNDDATAI_STATUS[%08x]\n",
+	       tr32(SNDDATAI_MODE), tr32(SNDDATAI_STATUS));
+	printk("       SNDDATAI_STATSCTRL[%08x]\n",
+	       tr32(SNDDATAI_STATSCTRL));
+
+	/* Send data completion control block */
+	printk("DEBUG: SNDDATAC_MODE[%08x]\n", tr32(SNDDATAC_MODE));
+
+	/* Send BD ring selector block */
+	printk("DEBUG: SNDBDS_MODE[%08x] SNDBDS_STATUS[%08x]\n",
+	       tr32(SNDBDS_MODE), tr32(SNDBDS_STATUS));
+
+	/* Send BD initiator control block */
+	printk("DEBUG: SNDBDI_MODE[%08x] SNDBDI_STATUS[%08x]\n",
+	       tr32(SNDBDI_MODE), tr32(SNDBDI_STATUS));
+
+	/* Send BD completion control block */
+	printk("DEBUG: SNDBDC_MODE[%08x]\n", tr32(SNDBDC_MODE));
+
+	/* Receive list placement control block */
+	printk("DEBUG: RCVLPC_MODE[%08x] RCVLPC_STATUS[%08x]\n",
+	       tr32(RCVLPC_MODE), tr32(RCVLPC_STATUS));
+	printk("       RCVLPC_STATSCTRL[%08x]\n",
+	       tr32(RCVLPC_STATSCTRL));
+
+	/* Receive data and receive BD initiator control block */
+	printk("DEBUG: RCVDBDI_MODE[%08x] RCVDBDI_STATUS[%08x]\n",
+	       tr32(RCVDBDI_MODE), tr32(RCVDBDI_STATUS));
+
+	/* Receive data completion control block */
+	printk("DEBUG: RCVDCC_MODE[%08x]\n",
+	       tr32(RCVDCC_MODE));
+
+	/* Receive BD initiator control block */
+	printk("DEBUG: RCVBDI_MODE[%08x] RCVBDI_STATUS[%08x]\n",
+	       tr32(RCVBDI_MODE), tr32(RCVBDI_STATUS));
+
+	/* Receive BD completion control block */
+	printk("DEBUG: RCVCC_MODE[%08x] RCVCC_STATUS[%08x]\n",
+	       tr32(RCVCC_MODE), tr32(RCVCC_STATUS));
+
+	/* Receive list selector control block */
+	printk("DEBUG: RCVLSC_MODE[%08x] RCVLSC_STATUS[%08x]\n",
+	       tr32(RCVLSC_MODE), tr32(RCVLSC_STATUS));
+
+	/* Mbuf cluster free block */
+	printk("DEBUG: MBFREE_MODE[%08x] MBFREE_STATUS[%08x]\n",
+	       tr32(MBFREE_MODE), tr32(MBFREE_STATUS));
+
+	/* Host coalescing control block */
+	printk("DEBUG: HOSTCC_MODE[%08x] HOSTCC_STATUS[%08x]\n",
+	       tr32(HOSTCC_MODE), tr32(HOSTCC_STATUS));
+	printk("DEBUG: HOSTCC_STATS_BLK_HOST_ADDR[%08x%08x]\n",
+	       tr32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH),
+	       tr32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW));
+	printk("DEBUG: HOSTCC_STATUS_BLK_HOST_ADDR[%08x%08x]\n",
+	       tr32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH),
+	       tr32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW));
+	printk("DEBUG: HOSTCC_STATS_BLK_NIC_ADDR[%08x]\n",
+	       tr32(HOSTCC_STATS_BLK_NIC_ADDR));
+	printk("DEBUG: HOSTCC_STATUS_BLK_NIC_ADDR[%08x]\n",
+	       tr32(HOSTCC_STATUS_BLK_NIC_ADDR));
+
+	/* Memory arbiter control block */
+	printk("DEBUG: MEMARB_MODE[%08x] MEMARB_STATUS[%08x]\n",
+	       tr32(MEMARB_MODE), tr32(MEMARB_STATUS));
+
+	/* Buffer manager control block */
+	printk("DEBUG: BUFMGR_MODE[%08x] BUFMGR_STATUS[%08x]\n",
+	       tr32(BUFMGR_MODE), tr32(BUFMGR_STATUS));
+	printk("DEBUG: BUFMGR_MB_POOL_ADDR[%08x] BUFMGR_MB_POOL_SIZE[%08x]\n",
+	       tr32(BUFMGR_MB_POOL_ADDR), tr32(BUFMGR_MB_POOL_SIZE));
+	printk("DEBUG: BUFMGR_DMA_DESC_POOL_ADDR[%08x] "
+	       "BUFMGR_DMA_DESC_POOL_SIZE[%08x]\n",
+	       tr32(BUFMGR_DMA_DESC_POOL_ADDR),
+	       tr32(BUFMGR_DMA_DESC_POOL_SIZE));
+
+	/* Read DMA control block */
+	printk("DEBUG: RDMAC_MODE[%08x] RDMAC_STATUS[%08x]\n",
+	       tr32(RDMAC_MODE), tr32(RDMAC_STATUS));
+
+	/* Write DMA control block */
+	printk("DEBUG: WDMAC_MODE[%08x] WDMAC_STATUS[%08x]\n",
+	       tr32(WDMAC_MODE), tr32(WDMAC_STATUS));
+
+	/* DMA completion block */
+	printk("DEBUG: DMAC_MODE[%08x]\n",
+	       tr32(DMAC_MODE));
+
+	/* GRC block */
+	printk("DEBUG: GRC_MODE[%08x] GRC_MISC_CFG[%08x]\n",
+	       tr32(GRC_MODE), tr32(GRC_MISC_CFG));
+	printk("DEBUG: GRC_LOCAL_CTRL[%08x]\n",
+	       tr32(GRC_LOCAL_CTRL));
+
+	/* TG3_BDINFOs */
+	printk("DEBUG: RCVDBDI_JUMBO_BD[%08x%08x:%08x:%08x]\n",
+	       tr32(RCVDBDI_JUMBO_BD + 0x0),
+	       tr32(RCVDBDI_JUMBO_BD + 0x4),
+	       tr32(RCVDBDI_JUMBO_BD + 0x8),
+	       tr32(RCVDBDI_JUMBO_BD + 0xc));
+	printk("DEBUG: RCVDBDI_STD_BD[%08x%08x:%08x:%08x]\n",
+	       tr32(RCVDBDI_STD_BD + 0x0),
+	       tr32(RCVDBDI_STD_BD + 0x4),
+	       tr32(RCVDBDI_STD_BD + 0x8),
+	       tr32(RCVDBDI_STD_BD + 0xc));
+	printk("DEBUG: RCVDBDI_MINI_BD[%08x%08x:%08x:%08x]\n",
+	       tr32(RCVDBDI_MINI_BD + 0x0),
+	       tr32(RCVDBDI_MINI_BD + 0x4),
+	       tr32(RCVDBDI_MINI_BD + 0x8),
+	       tr32(RCVDBDI_MINI_BD + 0xc));
+
+	tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0x0, &val32);
+	tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0x4, &val32_2);
+	tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0x8, &val32_3);
+	tg3_read_mem(tp, NIC_SRAM_SEND_RCB + 0xc, &val32_4);
+	printk("DEBUG: SRAM_SEND_RCB_0[%08x%08x:%08x:%08x]\n",
+	       val32, val32_2, val32_3, val32_4);
+
+	tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0x0, &val32);
+	tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0x4, &val32_2);
+	tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0x8, &val32_3);
+	tg3_read_mem(tp, NIC_SRAM_RCV_RET_RCB + 0xc, &val32_4);
+	printk("DEBUG: SRAM_RCV_RET_RCB_0[%08x%08x:%08x:%08x]\n",
+	       val32, val32_2, val32_3, val32_4);
+
+	tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x0, &val32);
+	tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x4, &val32_2);
+	tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x8, &val32_3);
+	tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0xc, &val32_4);
+	tg3_read_mem(tp, NIC_SRAM_STATUS_BLK + 0x10, &val32_5);
+	printk("DEBUG: SRAM_STATUS_BLK[%08x:%08x:%08x:%08x:%08x]\n",
+	       val32, val32_2, val32_3, val32_4, val32_5);
+
+	/* SW status block */
+	printk("DEBUG: Host status block [%08x:%08x:(%04x:%04x:%04x):(%04x:%04x)]\n",
+	       tp->hw_status->status,
+	       tp->hw_status->status_tag,
+	       tp->hw_status->rx_jumbo_consumer,
+	       tp->hw_status->rx_consumer,
+	       tp->hw_status->rx_mini_consumer,
+	       tp->hw_status->idx[0].rx_producer,
+	       tp->hw_status->idx[0].tx_consumer);
+
+	/* SW statistics block */
+	printk("DEBUG: Host statistics block [%08x:%08x:%08x:%08x]\n",
+	       ((u32 *)tp->hw_stats)[0],
+	       ((u32 *)tp->hw_stats)[1],
+	       ((u32 *)tp->hw_stats)[2],
+	       ((u32 *)tp->hw_stats)[3]);
+
+	/* Mailboxes */
+	printk("DEBUG: SNDHOST_PROD[%08x%08x] SNDNIC_PROD[%08x%08x]\n",
+	       tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0),
+	       tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4),
+	       tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0),
+	       tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4));
+
+	/* NIC side send descriptors. */
+	for (i = 0; i < 6; i++) {
+		unsigned long txd;
+
+		txd = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_TX_BUFFER_DESC
+			+ (i * sizeof(struct tg3_tx_buffer_desc));
+		printk("DEBUG: NIC TXD(%d)[%08x:%08x:%08x:%08x]\n",
+		       i,
+		       readl(txd + 0x0), readl(txd + 0x4),
+		       readl(txd + 0x8), readl(txd + 0xc));
+	}
+
+	/* NIC side RX descriptors. */
+	for (i = 0; i < 6; i++) {
+		unsigned long rxd;
+
+		rxd = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_RX_BUFFER_DESC
+			+ (i * sizeof(struct tg3_rx_buffer_desc));
+		printk("DEBUG: NIC RXD_STD(%d)[0][%08x:%08x:%08x:%08x]\n",
+		       i,
+		       readl(rxd + 0x0), readl(rxd + 0x4),
+		       readl(rxd + 0x8), readl(rxd + 0xc));
+		rxd += (4 * sizeof(u32));
+		printk("DEBUG: NIC RXD_STD(%d)[1][%08x:%08x:%08x:%08x]\n",
+		       i,
+		       readl(rxd + 0x0), readl(rxd + 0x4),
+		       readl(rxd + 0x8), readl(rxd + 0xc));
+	}
+
+	for (i = 0; i < 6; i++) {
+		unsigned long rxd;
+
+		rxd = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_RX_JUMBO_BUFFER_DESC
+			+ (i * sizeof(struct tg3_rx_buffer_desc));
+		printk("DEBUG: NIC RXD_JUMBO(%d)[0][%08x:%08x:%08x:%08x]\n",
+		       i,
+		       readl(rxd + 0x0), readl(rxd + 0x4),
+		       readl(rxd + 0x8), readl(rxd + 0xc));
+		rxd += (4 * sizeof(u32));
+		printk("DEBUG: NIC RXD_JUMBO(%d)[1][%08x:%08x:%08x:%08x]\n",
+		       i,
+		       readl(rxd + 0x0), readl(rxd + 0x4),
+		       readl(rxd + 0x8), readl(rxd + 0xc));
+	}
+}
+#endif
+
+static struct net_device_stats *tg3_get_stats(struct net_device *);
+
+static int tg3_close(struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+
+	netif_stop_queue(dev);
+
+	del_timer_sync(&tp->timer);
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+#if 0
+	tg3_dump_state(tp);
+#endif
+
+	tg3_disable_ints(tp);
+
+	tg3_halt(tp);
+	tg3_free_rings(tp);
+	tp->tg3_flags &=
+		~(TG3_FLAG_INIT_COMPLETE |
+		  TG3_FLAG_GOT_SERDES_FLOWCTL);
+	netif_carrier_off(tp->dev);
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	free_irq(dev->irq, dev);
+
+	memcpy(&tp->net_stats_prev, tg3_get_stats(tp->dev),
+	       sizeof(tp->net_stats_prev));
+
+	tg3_free_consistent(tp);
+
+	return 0;
+}
+
+static inline unsigned long get_stat64(tg3_stat64_t *val)
+{
+	unsigned long ret;
+
+#if (BITS_PER_LONG == 32)
+	ret = val->low;
+#else
+	ret = ((u64)val->high << 32) | ((u64)val->low);
+#endif
+	return ret;
+}
+
+static unsigned long calc_crc_errors(struct tg3 *tp)
+{
+	struct tg3_hw_stats *hw_stats = tp->hw_stats;
+
+	if (tp->phy_id != PHY_ID_SERDES &&
+	    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+	     GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)) {
+		unsigned long flags;
+		u32 val;
+
+		spin_lock_irqsave(&tp->lock, flags);
+		tg3_readphy(tp, 0x1e, &val);
+		tg3_writephy(tp, 0x1e, val | 0x8000);
+		tg3_readphy(tp, 0x14, &val);
+		spin_unlock_irqrestore(&tp->lock, flags);
+
+		tp->phy_crc_errors += val;
+
+		return tp->phy_crc_errors;
+	}
+
+	return get_stat64(&hw_stats->rx_fcs_errors);
+}
+
+static struct net_device_stats *tg3_get_stats(struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+	struct net_device_stats *stats = &tp->net_stats;
+	struct net_device_stats *old_stats = &tp->net_stats_prev;
+	struct tg3_hw_stats *hw_stats = tp->hw_stats;
+
+	if (!hw_stats)
+		return old_stats;
+
+	stats->rx_packets = old_stats->rx_packets +
+		get_stat64(&hw_stats->rx_ucast_packets) +
+		get_stat64(&hw_stats->rx_mcast_packets) +
+		get_stat64(&hw_stats->rx_bcast_packets);
+		
+	stats->tx_packets = old_stats->tx_packets +
+		get_stat64(&hw_stats->COS_out_packets[0]);
+
+	stats->rx_bytes = old_stats->rx_bytes +
+		get_stat64(&hw_stats->rx_octets);
+	stats->tx_bytes = old_stats->tx_bytes +
+		get_stat64(&hw_stats->tx_octets);
+
+	stats->rx_errors = old_stats->rx_errors +
+		get_stat64(&hw_stats->rx_errors);
+	stats->tx_errors = old_stats->tx_errors +
+		get_stat64(&hw_stats->tx_errors) +
+		get_stat64(&hw_stats->tx_mac_errors) +
+		get_stat64(&hw_stats->tx_carrier_sense_errors) +
+		get_stat64(&hw_stats->tx_discards);
+
+	stats->multicast = old_stats->multicast +
+		get_stat64(&hw_stats->rx_mcast_packets);
+	stats->collisions = old_stats->collisions +
+		get_stat64(&hw_stats->tx_collisions);
+
+	stats->rx_length_errors = old_stats->rx_length_errors +
+		get_stat64(&hw_stats->rx_frame_too_long_errors) +
+		get_stat64(&hw_stats->rx_undersize_packets);
+
+	stats->rx_over_errors = old_stats->rx_over_errors +
+		get_stat64(&hw_stats->rxbds_empty);
+	stats->rx_frame_errors = old_stats->rx_frame_errors +
+		get_stat64(&hw_stats->rx_align_errors);
+	stats->tx_aborted_errors = old_stats->tx_aborted_errors +
+		get_stat64(&hw_stats->tx_discards);
+	stats->tx_carrier_errors = old_stats->tx_carrier_errors +
+		get_stat64(&hw_stats->tx_carrier_sense_errors);
+
+	stats->rx_crc_errors = old_stats->rx_crc_errors +
+		calc_crc_errors(tp);
+
+	return stats;
+}
+
+static inline u32 calc_crc(unsigned char *buf, int len)
+{
+	u32 reg;
+	u32 tmp;
+	int j, k;
+
+	reg = 0xffffffff;
+
+	for (j = 0; j < len; j++) {
+		reg ^= buf[j];
+
+		for (k = 0; k < 8; k++) {
+			tmp = reg & 0x01;
+
+			reg >>= 1;
+
+			if (tmp) {
+				reg ^= 0xedb88320;
+			}
+		}
+	}
+
+	return ~reg;
+}
+
+static void tg3_set_multi(struct tg3 *tp, unsigned int accept_all)
+{
+	/* accept or reject all multicast frames */
+	tw32(MAC_HASH_REG_0, accept_all ? 0xffffffff : 0);
+	tw32(MAC_HASH_REG_1, accept_all ? 0xffffffff : 0);
+	tw32(MAC_HASH_REG_2, accept_all ? 0xffffffff : 0);
+	tw32(MAC_HASH_REG_3, accept_all ? 0xffffffff : 0);
+}
+
+static void __tg3_set_rx_mode(struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+	u32 rx_mode;
+
+	rx_mode = tp->rx_mode & ~(RX_MODE_PROMISC |
+				  RX_MODE_KEEP_VLAN_TAG);
+#if TG3_VLAN_TAG_USED
+	if (!tp->vlgrp)
+		rx_mode |= RX_MODE_KEEP_VLAN_TAG;
+#else
+	/* By definition, VLAN is disabled always in this
+	 * case.
+	 */
+	rx_mode |= RX_MODE_KEEP_VLAN_TAG;
+#endif
+
+	if (dev->flags & IFF_PROMISC) {
+		/* Promiscuous mode. */
+		rx_mode |= RX_MODE_PROMISC;
+	} else if (dev->flags & IFF_ALLMULTI) {
+		/* Accept all multicast. */
+		tg3_set_multi (tp, 1);
+	} else if (dev->mc_count < 1) {
+		/* Reject all multicast. */
+		tg3_set_multi (tp, 0);
+	} else {
+		/* Accept one or more multicast(s). */
+		struct dev_mc_list *mclist;
+		unsigned int i;
+		u32 mc_filter[4] = { 0, };
+		u32 regidx;
+		u32 bit;
+		u32 crc;
+
+		for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+		     i++, mclist = mclist->next) {
+
+			crc = calc_crc (mclist->dmi_addr, ETH_ALEN);
+			bit = ~crc & 0x7f;
+			regidx = (bit & 0x60) >> 5;
+			bit &= 0x1f;
+			mc_filter[regidx] |= (1 << bit);
+		}
+
+		tw32(MAC_HASH_REG_0, mc_filter[0]);
+		tw32(MAC_HASH_REG_1, mc_filter[1]);
+		tw32(MAC_HASH_REG_2, mc_filter[2]);
+		tw32(MAC_HASH_REG_3, mc_filter[3]);
+	}
+
+	if (rx_mode != tp->rx_mode) {
+		tp->rx_mode = rx_mode;
+		tw32(MAC_RX_MODE, rx_mode);
+		tr32(MAC_RX_MODE);
+		udelay(10);
+	}
+}
+
+static void tg3_set_rx_mode(struct net_device *dev)
+{
+	struct tg3 *tp = dev->priv;
+
+	spin_lock_irq(&tp->lock);
+	__tg3_set_rx_mode(dev);
+	spin_unlock_irq(&tp->lock);
+}
+
+#define TG3_REGDUMP_LEN		(32 * 1024)
+
+static u8 *tg3_get_regs(struct tg3 *tp)
+{
+	u8 *orig_p = kmalloc(TG3_REGDUMP_LEN, GFP_KERNEL);
+	u8 *p;
+	int i;
+
+	if (orig_p == NULL)
+		return NULL;
+
+	memset(orig_p, 0, TG3_REGDUMP_LEN);
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+#define __GET_REG32(reg)	(*((u32 *)(p))++ = tr32(reg))
+#define GET_REG32_LOOP(base,len)		\
+do {	p = orig_p + (base);			\
+	for (i = 0; i < len; i += 4)		\
+		__GET_REG32((base) + i);	\
+} while (0)
+#define GET_REG32_1(reg)	\
+do {	p = orig_p + (reg);	\
+	__GET_REG32((reg));	\
+} while (0)
+
+	GET_REG32_LOOP(TG3PCI_VENDOR, 0xb0);
+	GET_REG32_LOOP(MAILBOX_INTERRUPT_0, 0x200);
+	GET_REG32_LOOP(MAC_MODE, 0x4f0);
+	GET_REG32_LOOP(SNDDATAI_MODE, 0xe0);
+	GET_REG32_1(SNDDATAC_MODE);
+	GET_REG32_LOOP(SNDBDS_MODE, 0x80);
+	GET_REG32_LOOP(SNDBDI_MODE, 0x48);
+	GET_REG32_1(SNDBDC_MODE);
+	GET_REG32_LOOP(RCVLPC_MODE, 0x20);
+	GET_REG32_LOOP(RCVLPC_SELLST_BASE, 0x15c);
+	GET_REG32_LOOP(RCVDBDI_MODE, 0x0c);
+	GET_REG32_LOOP(RCVDBDI_JUMBO_BD, 0x3c);
+	GET_REG32_LOOP(RCVDBDI_BD_PROD_IDX_0, 0x44);
+	GET_REG32_1(RCVDCC_MODE);
+	GET_REG32_LOOP(RCVBDI_MODE, 0x20);
+	GET_REG32_LOOP(RCVCC_MODE, 0x14);
+	GET_REG32_LOOP(RCVLSC_MODE, 0x08);
+	GET_REG32_1(MBFREE_MODE);
+	GET_REG32_LOOP(HOSTCC_MODE, 0x100);
+	GET_REG32_LOOP(MEMARB_MODE, 0x10);
+	GET_REG32_LOOP(BUFMGR_MODE, 0x58);
+	GET_REG32_LOOP(RDMAC_MODE, 0x08);
+	GET_REG32_LOOP(WDMAC_MODE, 0x08);
+	GET_REG32_LOOP(RX_CPU_BASE, 0x280);
+	GET_REG32_LOOP(TX_CPU_BASE, 0x280);
+	GET_REG32_LOOP(GRCMBOX_INTERRUPT_0, 0x110);
+	GET_REG32_LOOP(FTQ_RESET, 0x120);
+	GET_REG32_LOOP(MSGINT_MODE, 0x0c);
+	GET_REG32_1(DMAC_MODE);
+	GET_REG32_LOOP(GRC_MODE, 0x4c);
+	GET_REG32_LOOP(NVRAM_CMD, 0x24);
+
+#undef __GET_REG32
+#undef GET_REG32_LOOP
+#undef GET_REG32_1
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	return orig_p;
+}
+
+static int tg3_ethtool_ioctl (struct net_device *dev, void *useraddr)
+{
+	struct tg3 *tp = dev->priv;
+	struct pci_dev *pci_dev = tp->pdev;
+	u32 ethcmd;
+
+	if (copy_from_user (&ethcmd, useraddr, sizeof (ethcmd)))
+		return -EFAULT;
+
+	switch (ethcmd) {
+	case ETHTOOL_GDRVINFO:{
+		struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
+		strcpy (info.driver, DRV_MODULE_NAME);
+		strcpy (info.version, DRV_MODULE_VERSION);
+		memset(&info.fw_version, 0, sizeof(info.fw_version));
+		strcpy (info.bus_info, pci_dev->slot_name);
+		info.eedump_len = 0;
+		info.regdump_len = TG3_REGDUMP_LEN;
+		if (copy_to_user (useraddr, &info, sizeof (info)))
+			return -EFAULT;
+		return 0;
+	}
+
+	case ETHTOOL_GSET: {
+		struct ethtool_cmd cmd = { ETHTOOL_GSET };
+
+		if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+		    tp->link_config.phy_is_low_power)
+			return -EAGAIN;
+		cmd.supported = (SUPPORTED_Autoneg);
+
+		if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY))
+			cmd.supported |= (SUPPORTED_1000baseT_Half |
+					  SUPPORTED_1000baseT_Full);
+
+		if (tp->phy_id != PHY_ID_SERDES)
+			cmd.supported |= (SUPPORTED_100baseT_Half |
+					  SUPPORTED_100baseT_Full |
+					  SUPPORTED_10baseT_Half |
+					  SUPPORTED_10baseT_Full |
+					  SUPPORTED_MII);
+		else
+			cmd.supported |= SUPPORTED_FIBRE;
+
+		cmd.advertising = tp->link_config.advertising;
+		cmd.speed = tp->link_config.active_speed;
+		cmd.duplex = tp->link_config.active_duplex;
+		cmd.port = 0;
+		cmd.phy_address = PHY_ADDR;
+		cmd.transceiver = 0;
+		cmd.autoneg = tp->link_config.autoneg;
+		cmd.maxtxpkt = 0;
+		cmd.maxrxpkt = 0;
+		if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SSET: {
+		struct ethtool_cmd cmd;
+
+		if (!(tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) ||
+		    tp->link_config.phy_is_low_power)
+			return -EAGAIN;
+
+		if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+			return -EFAULT;
+
+		/* Fiber PHY only supports 1000 full/half */
+		if (cmd.autoneg == AUTONEG_ENABLE) {
+			if (tp->phy_id == PHY_ID_SERDES &&
+			    (cmd.advertising &
+			     (ADVERTISED_10baseT_Half |
+			      ADVERTISED_10baseT_Full |
+			      ADVERTISED_100baseT_Half |
+			      ADVERTISED_100baseT_Full)))
+				return -EINVAL;
+			if ((tp->tg3_flags & TG3_FLAG_10_100_ONLY) &&
+			    (cmd.advertising &
+			     (ADVERTISED_1000baseT_Half |
+			      ADVERTISED_1000baseT_Full)))
+				return -EINVAL;
+		} else {
+			if (tp->phy_id == PHY_ID_SERDES &&
+			    (cmd.speed == SPEED_10 ||
+			     cmd.speed == SPEED_100))
+				return -EINVAL;
+			if ((tp->tg3_flags & TG3_FLAG_10_100_ONLY) &&
+			    (cmd.speed == SPEED_10 ||
+			     cmd.speed == SPEED_100))
+				return -EINVAL;
+		}
+
+		spin_lock_irq(&tp->lock);
+		spin_lock(&tp->tx_lock);
+
+		tp->link_config.autoneg = cmd.autoneg;
+		if (cmd.autoneg == AUTONEG_ENABLE) {
+			tp->link_config.advertising = cmd.advertising;
+			tp->link_config.speed = SPEED_INVALID;
+			tp->link_config.duplex = DUPLEX_INVALID;
+		} else {
+			tp->link_config.speed = cmd.speed;
+			tp->link_config.duplex = cmd.duplex;
+		}
+
+		tg3_setup_phy(tp);
+		spin_unlock(&tp->tx_lock);
+		spin_unlock_irq(&tp->lock);
+
+		return 0;
+	}
+
+	case ETHTOOL_GREGS: {
+		struct ethtool_regs regs;
+		u8 *regbuf;
+		int ret;
+
+		if (copy_from_user(&regs, useraddr, sizeof(regs)))
+			return -EFAULT;
+		if (regs.len > TG3_REGDUMP_LEN)
+			regs.len = TG3_REGDUMP_LEN;
+		regs.version = 0;
+		if (copy_to_user(useraddr, &regs, sizeof(regs)))
+			return -EFAULT;
+
+		regbuf = tg3_get_regs(tp);
+		if (!regbuf)
+			return -ENOMEM;
+
+		useraddr += offsetof(struct ethtool_regs, data);
+		ret = 0;
+		if (copy_to_user(useraddr, regbuf, regs.len))
+			ret = -EFAULT;
+		kfree(regbuf);
+		return ret;
+	}
+	case ETHTOOL_GWOL: {
+		struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+		wol.supported = WAKE_MAGIC;
+		wol.wolopts = 0;
+		if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)
+			wol.wolopts = WAKE_MAGIC;
+		memset(&wol.sopass, 0, sizeof(wol.sopass));
+		if (copy_to_user(useraddr, &wol, sizeof(wol)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SWOL: {
+		struct ethtool_wolinfo wol;
+
+		if (copy_from_user(&wol, useraddr, sizeof(wol)))
+			return -EFAULT;
+		if (wol.wolopts & ~WAKE_MAGIC)
+			return -EINVAL;
+		if ((wol.wolopts & WAKE_MAGIC) &&
+		    tp->phy_id == PHY_ID_SERDES &&
+		    !(tp->tg3_flags & TG3_FLAG_SERDES_WOL_CAP))
+			return -EINVAL;
+
+		spin_lock_irq(&tp->lock);
+		if (wol.wolopts & WAKE_MAGIC)
+			tp->tg3_flags |= TG3_FLAG_WOL_ENABLE;
+		else
+			tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE;
+		spin_unlock_irq(&tp->lock);
+
+		return 0;
+	}
+	case ETHTOOL_GMSGLVL: {
+		struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+		edata.data = tp->msg_enable;
+		if (copy_to_user(useraddr, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SMSGLVL: {
+		struct ethtool_value edata;
+		if (copy_from_user(&edata, useraddr, sizeof(edata)))
+			return -EFAULT;
+		tp->msg_enable = edata.data;
+		return 0;
+	}
+	case ETHTOOL_NWAY_RST: {
+		u32 bmcr;
+		int r;
+
+		spin_lock_irq(&tp->lock);
+		tg3_readphy(tp, MII_BMCR, &bmcr);
+		tg3_readphy(tp, MII_BMCR, &bmcr);
+		r = -EINVAL;
+		if (bmcr & BMCR_ANENABLE) {
+			tg3_writephy(tp, MII_BMCR,
+				     bmcr | BMCR_ANRESTART);
+			r = 0;
+		}
+		spin_unlock_irq(&tp->lock);
+
+		return r;
+	}
+	case ETHTOOL_GLINK: {
+		struct ethtool_value edata = { ETHTOOL_GLINK };
+		edata.data = netif_carrier_ok(tp->dev) ? 1 : 0;
+		if (copy_to_user(useraddr, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_GRINGPARAM: {
+		struct ethtool_ringparam ering = { ETHTOOL_GRINGPARAM };
+
+		ering.rx_max_pending = TG3_RX_RING_SIZE - 1;
+		ering.rx_mini_max_pending = 0;
+		ering.rx_jumbo_max_pending = TG3_RX_JUMBO_RING_SIZE - 1;
+
+		ering.rx_pending = tp->rx_pending;
+		ering.rx_mini_pending = 0;
+		ering.rx_jumbo_pending = tp->rx_jumbo_pending;
+		ering.tx_pending = tp->tx_pending;
+
+		if (copy_to_user(useraddr, &ering, sizeof(ering)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SRINGPARAM: {
+		struct ethtool_ringparam ering;
+
+		if (copy_from_user(&ering, useraddr, sizeof(ering)))
+			return -EFAULT;
+
+		if ((ering.rx_pending > TG3_RX_RING_SIZE - 1) ||
+		    (ering.rx_jumbo_pending > TG3_RX_JUMBO_RING_SIZE - 1) ||
+		    (ering.tx_pending > TG3_TX_RING_SIZE - 1))
+			return -EINVAL;
+
+		spin_lock_irq(&tp->lock);
+		spin_lock(&tp->tx_lock);
+
+		tp->rx_pending = ering.rx_pending;
+		tp->rx_jumbo_pending = ering.rx_jumbo_pending;
+		tp->tx_pending = ering.tx_pending;
+
+		tg3_halt(tp);
+		tg3_init_rings(tp);
+		tg3_init_hw(tp);
+		netif_wake_queue(tp->dev);
+		spin_unlock(&tp->tx_lock);
+		spin_unlock_irq(&tp->lock);
+
+		return 0;
+	}
+	case ETHTOOL_GPAUSEPARAM: {
+		struct ethtool_pauseparam epause = { ETHTOOL_GPAUSEPARAM };
+
+		epause.autoneg =
+			(tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG) != 0;
+		epause.rx_pause =
+			(tp->tg3_flags & TG3_FLAG_PAUSE_RX) != 0;
+		epause.tx_pause =
+			(tp->tg3_flags & TG3_FLAG_PAUSE_TX) != 0;
+		if (copy_to_user(useraddr, &epause, sizeof(epause)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SPAUSEPARAM: {
+		struct ethtool_pauseparam epause;
+
+		if (copy_from_user(&epause, useraddr, sizeof(epause)))
+			return -EFAULT;
+
+		spin_lock_irq(&tp->lock);
+		spin_lock(&tp->tx_lock);
+		if (epause.autoneg)
+			tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
+		else
+			tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG;
+		if (epause.rx_pause)
+			tp->tg3_flags |= TG3_FLAG_PAUSE_RX;
+		else
+			tp->tg3_flags &= ~TG3_FLAG_PAUSE_RX;
+		if (epause.tx_pause)
+			tp->tg3_flags |= TG3_FLAG_PAUSE_TX;
+		else
+			tp->tg3_flags &= ~TG3_FLAG_PAUSE_TX;
+		tg3_halt(tp);
+		tg3_init_rings(tp);
+		tg3_init_hw(tp);
+		spin_unlock(&tp->tx_lock);
+		spin_unlock_irq(&tp->lock);
+
+		return 0;
+	}
+	case ETHTOOL_GRXCSUM: {
+		struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+		edata.data =
+			(tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) != 0;
+		if (copy_to_user(useraddr, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SRXCSUM: {
+		struct ethtool_value edata;
+
+		if (copy_from_user(&edata, useraddr, sizeof(edata)))
+			return -EFAULT;
+
+		if (tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) {
+			if (edata.data != 0)
+				return -EINVAL;
+			return 0;
+		}
+
+		spin_lock_irq(&tp->lock);
+		if (edata.data)
+			tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
+		else
+			tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
+		spin_unlock_irq(&tp->lock);
+
+		return 0;
+	}
+	case ETHTOOL_GTXCSUM: {
+		struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+		edata.data =
+			(tp->dev->features & NETIF_F_IP_CSUM) != 0;
+		if (copy_to_user(useraddr, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_STXCSUM: {
+		struct ethtool_value edata;
+
+		if (copy_from_user(&edata, useraddr, sizeof(edata)))
+			return -EFAULT;
+
+		if (tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) {
+			if (edata.data != 0)
+				return -EINVAL;
+			return 0;
+		}
+
+		if (edata.data)
+			tp->dev->features |= NETIF_F_IP_CSUM;
+		else
+			tp->dev->features &= ~NETIF_F_IP_CSUM;
+
+		return 0;
+	}
+	case ETHTOOL_GSG: {
+		struct ethtool_value edata = { ETHTOOL_GSG };
+
+		edata.data =
+			(tp->dev->features & NETIF_F_SG) != 0;
+		if (copy_to_user(useraddr, &edata, sizeof(edata)))
+			return -EFAULT;
+		return 0;
+	}
+	case ETHTOOL_SSG: {
+		struct ethtool_value edata;
+
+		if (copy_from_user(&edata, useraddr, sizeof(edata)))
+			return -EFAULT;
+
+		if (edata.data)
+			tp->dev->features |= NETIF_F_SG;
+		else
+			tp->dev->features &= ~NETIF_F_SG;
+
+		return 0;
+	}
+	};
+
+	return -EOPNOTSUPP;
+}
+
+static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data;
+	struct tg3 *tp = dev->priv;
+	int err;
+
+	switch(cmd) {
+	case SIOCETHTOOL:
+		return tg3_ethtool_ioctl(dev, (void *) ifr->ifr_data);
+	case SIOCGMIIPHY:
+		data->phy_id = PHY_ADDR;
+
+		/* fallthru */
+	case SIOCGMIIREG: {
+		u32 mii_regval;
+
+		spin_lock_irq(&tp->lock);
+		err = tg3_readphy(tp, data->reg_num & 0x1f, &mii_regval);
+		spin_unlock_irq(&tp->lock);
+
+		data->val_out = mii_regval;
+
+		return err;
+	}
+
+	case SIOCSMIIREG:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		spin_lock_irq(&tp->lock);
+		err = tg3_writephy(tp, data->reg_num & 0x1f, data->val_in);
+		spin_unlock_irq(&tp->lock);
+
+		return err;
+
+	default:
+		/* do nothing */
+		break;
+	}
+	return -EOPNOTSUPP;
+}
+
+#if TG3_VLAN_TAG_USED
+static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
+{
+	struct tg3 *tp = dev->priv;
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tp->vlgrp = grp;
+
+	/* Update RX_MODE_KEEP_VLAN_TAG bit in RX_MODE register. */
+	__tg3_set_rx_mode(dev);
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+}
+
+static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+{
+	struct tg3 *tp = dev->priv;
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+	if (tp->vlgrp)
+		tp->vlgrp->vlan_devices[vid] = NULL;
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+}
+#endif
+
+/* Chips other than 5700/5701 use the NVRAM for fetching info. */
+static void __devinit tg3_nvram_init(struct tg3 *tp)
+{
+	int j;
+
+	tw32(GRC_EEPROM_ADDR,
+	     (EEPROM_ADDR_FSM_RESET |
+	      (EEPROM_DEFAULT_CLOCK_PERIOD <<
+	       EEPROM_ADDR_CLKPERD_SHIFT)));
+
+	/* XXX schedule_timeout() ... */
+	for (j = 0; j < 100; j++)
+		udelay(10);
+
+	/* Enable seeprom accesses. */
+	tw32(GRC_LOCAL_CTRL,
+	     tr32(GRC_LOCAL_CTRL) | GRC_LCLCTRL_AUTO_SEEPROM);
+	tr32(GRC_LOCAL_CTRL);
+	udelay(100);
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) {
+		u32 nvcfg1 = tr32(NVRAM_CFG1);
+
+		tp->tg3_flags |= TG3_FLAG_NVRAM;
+		if (nvcfg1 & NVRAM_CFG1_FLASHIF_ENAB) {
+			if (nvcfg1 & NVRAM_CFG1_BUFFERED_MODE)
+				tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
+		} else {
+			nvcfg1 &= ~NVRAM_CFG1_COMPAT_BYPASS;
+			tw32(NVRAM_CFG1, nvcfg1);
+		}
+
+	} else {
+		tp->tg3_flags &= ~(TG3_FLAG_NVRAM | TG3_FLAG_NVRAM_BUFFERED);
+	}
+}
+
+static int __devinit tg3_nvram_read_using_eeprom(struct tg3 *tp,
+						 u32 offset, u32 *val)
+{
+	u32 tmp;
+	int i;
+
+	if (offset > EEPROM_ADDR_ADDR_MASK ||
+	    (offset % 4) != 0)
+		return -EINVAL;
+
+	tmp = tr32(GRC_EEPROM_ADDR) & ~(EEPROM_ADDR_ADDR_MASK |
+					EEPROM_ADDR_DEVID_MASK |
+					EEPROM_ADDR_READ);
+	tw32(GRC_EEPROM_ADDR,
+	     tmp |
+	     (0 << EEPROM_ADDR_DEVID_SHIFT) |
+	     ((offset << EEPROM_ADDR_ADDR_SHIFT) &
+	      EEPROM_ADDR_ADDR_MASK) |
+	     EEPROM_ADDR_READ | EEPROM_ADDR_START);
+
+	for (i = 0; i < 10000; i++) {
+		tmp = tr32(GRC_EEPROM_ADDR);
+
+		if (tmp & EEPROM_ADDR_COMPLETE)
+			break;
+		udelay(100);
+	}
+	if (!(tmp & EEPROM_ADDR_COMPLETE))
+		return -EBUSY;
+
+	*val = tr32(GRC_EEPROM_DATA);
+	return 0;
+}
+
+static int __devinit tg3_nvram_read(struct tg3 *tp,
+				    u32 offset, u32 *val)
+{
+	int i, saw_done_clear;
+
+	if (!(tp->tg3_flags & TG3_FLAG_NVRAM))
+		return tg3_nvram_read_using_eeprom(tp, offset, val);
+
+	if (tp->tg3_flags & TG3_FLAG_NVRAM_BUFFERED)
+		offset = ((offset / NVRAM_BUFFERED_PAGE_SIZE) <<
+			  NVRAM_BUFFERED_PAGE_POS) +
+			(offset % NVRAM_BUFFERED_PAGE_SIZE);
+
+	if (offset > NVRAM_ADDR_MSK)
+		return -EINVAL;
+
+	tw32(NVRAM_SWARB, SWARB_REQ_SET1);
+	for (i = 0; i < 1000; i++) {
+		if (tr32(NVRAM_SWARB) & SWARB_GNT1)
+			break;
+		udelay(20);
+	}
+
+	tw32(NVRAM_ADDR, offset);
+	tw32(NVRAM_CMD,
+	     NVRAM_CMD_RD | NVRAM_CMD_GO |
+	     NVRAM_CMD_FIRST | NVRAM_CMD_LAST | NVRAM_CMD_DONE);
+
+	/* Wait for done bit to clear then set again. */
+	saw_done_clear = 0;
+	for (i = 0; i < 1000; i++) {
+		udelay(10);
+		if (!saw_done_clear &&
+		    !(tr32(NVRAM_CMD) & NVRAM_CMD_DONE))
+			saw_done_clear = 1;
+		else if (saw_done_clear &&
+			 (tr32(NVRAM_CMD) & NVRAM_CMD_DONE))
+			break;
+	}
+	if (i >= 1000) {
+		tw32(NVRAM_SWARB, SWARB_REQ_CLR1);
+		return -EBUSY;
+	}
+
+	*val = swab32(tr32(NVRAM_RDDATA));
+	tw32(NVRAM_SWARB, 0x20);
+
+	return 0;
+}
+
+struct subsys_tbl_ent {
+	u16 subsys_vendor, subsys_devid;
+	u32 phy_id;
+};
+
+static struct subsys_tbl_ent subsys_id_to_phy_id[] = {
+	/* Broadcom boards. */
+	{ 0x14e4, 0x1644, PHY_ID_BCM5401 }, /* BCM95700A6 */
+	{ 0x14e4, 0x0001, PHY_ID_BCM5701 }, /* BCM95701A5 */
+	{ 0x14e4, 0x0002, PHY_ID_BCM8002 }, /* BCM95700T6 */
+	{ 0x14e4, 0x0003, PHY_ID_SERDES  }, /* BCM95700A9 */
+	{ 0x14e4, 0x0005, PHY_ID_BCM5701 }, /* BCM95701T1 */
+	{ 0x14e4, 0x0006, PHY_ID_BCM5701 }, /* BCM95701T8 */
+	{ 0x14e4, 0x0007, PHY_ID_SERDES  }, /* BCM95701A7 */
+	{ 0x14e4, 0x0008, PHY_ID_BCM5701 }, /* BCM95701A10 */
+	{ 0x14e4, 0x8008, PHY_ID_BCM5701 }, /* BCM95701A12 */
+	{ 0x14e4, 0x0009, PHY_ID_BCM5701 }, /* BCM95703Ax1 */
+	{ 0x14e4, 0x8009, PHY_ID_BCM5701 }, /* BCM95703Ax2 */
+
+	/* 3com boards. */
+	{ PCI_VENDOR_ID_3COM, 0x1000, PHY_ID_BCM5401 }, /* 3C996T */
+	{ PCI_VENDOR_ID_3COM, 0x1006, PHY_ID_BCM5701 }, /* 3C996BT */
+	/* { PCI_VENDOR_ID_3COM, 0x1002, PHY_ID_XXX },     3C996CT */
+	/* { PCI_VENDOR_ID_3COM, 0x1003, PHY_ID_XXX },     3C997T */
+	{ PCI_VENDOR_ID_3COM, 0x1004, PHY_ID_SERDES  }, /* 3C996SX */
+	/* { PCI_VENDOR_ID_3COM, 0x1005, PHY_ID_XXX },     3C997SZ */
+	{ PCI_VENDOR_ID_3COM, 0x1007, PHY_ID_BCM5701 }, /* 3C1000T */
+	{ PCI_VENDOR_ID_3COM, 0x1008, PHY_ID_BCM5701 }, /* 3C940BR01 */
+
+	/* DELL boards. */
+	{ PCI_VENDOR_ID_DELL, 0x00d1, PHY_ID_BCM5401 }, /* VIPER */
+	{ PCI_VENDOR_ID_DELL, 0x0106, PHY_ID_BCM5401 }, /* JAGUAR */
+	{ PCI_VENDOR_ID_DELL, 0x0109, PHY_ID_BCM5411 }, /* MERLOT */
+	{ PCI_VENDOR_ID_DELL, 0x010a, PHY_ID_BCM5411 }, /* SLIM_MERLOT */
+
+	/* Compaq boards. */
+	{ PCI_VENDOR_ID_COMPAQ, 0x007c, PHY_ID_BCM5701 }, /* BANSHEE */
+	{ PCI_VENDOR_ID_COMPAQ, 0x009a, PHY_ID_BCM5701 }, /* BANSHEE_2 */
+	{ PCI_VENDOR_ID_COMPAQ, 0x007d, PHY_ID_SERDES  }, /* CHANGELING */
+	{ PCI_VENDOR_ID_COMPAQ, 0x0085, PHY_ID_BCM5701 }, /* NC7780 */
+	{ PCI_VENDOR_ID_COMPAQ, 0x0099, PHY_ID_BCM5701 }  /* NC7780_2 */
+};
+
+static int __devinit tg3_phy_probe(struct tg3 *tp)
+{
+	u32 eeprom_phy_id, hw_phy_id_1, hw_phy_id_2;
+	u32 hw_phy_id, hw_phy_id_masked;
+	enum phy_led_mode eeprom_led_mode;
+	u32 val;
+	int i, eeprom_signature_found, err;
+
+	tp->phy_id = PHY_ID_INVALID;
+	for (i = 0; i < ARRAY_SIZE(subsys_id_to_phy_id); i++) {
+		if ((subsys_id_to_phy_id[i].subsys_vendor ==
+		     tp->pdev->subsystem_vendor) &&
+		    (subsys_id_to_phy_id[i].subsys_devid ==
+		     tp->pdev->subsystem_device)) {
+			tp->phy_id = subsys_id_to_phy_id[i].phy_id;
+			break;
+		}
+	}
+
+	eeprom_phy_id = PHY_ID_INVALID;
+	eeprom_led_mode = led_mode_auto;
+	eeprom_signature_found = 0;
+	tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val);
+	if (val == NIC_SRAM_DATA_SIG_MAGIC) {
+		u32 nic_cfg;
+
+		tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg);
+
+		eeprom_signature_found = 1;
+
+		if ((nic_cfg & NIC_SRAM_DATA_CFG_PHY_TYPE_MASK) ==
+		    NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER) {
+			eeprom_phy_id = PHY_ID_SERDES;
+		} else {
+			u32 nic_phy_id;
+
+			tg3_read_mem(tp, NIC_SRAM_DATA_PHY_ID, &nic_phy_id);
+			if (nic_phy_id != 0) {
+				u32 id1 = nic_phy_id & NIC_SRAM_DATA_PHY_ID1_MASK;
+				u32 id2 = nic_phy_id & NIC_SRAM_DATA_PHY_ID2_MASK;
+
+				eeprom_phy_id  = (id1 >> 16) << 10;
+				eeprom_phy_id |= (id2 & 0xfc00) << 16;
+				eeprom_phy_id |= (id2 & 0x03ff) <<  0;
+			}
+		}
+
+		switch (nic_cfg & NIC_SRAM_DATA_CFG_LED_MODE_MASK) {
+		case NIC_SRAM_DATA_CFG_LED_TRIPLE_SPD:
+			eeprom_led_mode = led_mode_three_link;
+			break;
+
+		case NIC_SRAM_DATA_CFG_LED_LINK_SPD:
+			eeprom_led_mode = led_mode_link10;
+			break;
+
+		default:
+			eeprom_led_mode = led_mode_auto;
+			break;
+		};
+		if ((tp->pci_chip_rev_id == CHIPREV_ID_5703_A1 ||
+		     tp->pci_chip_rev_id == CHIPREV_ID_5703_A2) &&
+		    (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP))
+			tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT;
+
+		if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE)
+			tp->tg3_flags |= TG3_FLAG_ENABLE_ASF;
+		if (nic_cfg & NIC_SRAM_DATA_CFG_FIBER_WOL)
+			tp->tg3_flags |= TG3_FLAG_SERDES_WOL_CAP;
+	}
+
+	/* Now read the physical PHY_ID from the chip and verify
+	 * that it is sane.  If it doesn't look good, we fall back
+	 * to either the hard-coded table based PHY_ID and failing
+	 * that the value found in the eeprom area.
+	 */
+	err  = tg3_readphy(tp, MII_PHYSID1, &hw_phy_id_1);
+	err |= tg3_readphy(tp, MII_PHYSID2, &hw_phy_id_2);
+
+	hw_phy_id  = (hw_phy_id_1 & 0xffff) << 10;
+	hw_phy_id |= (hw_phy_id_2 & 0xfc00) << 16;
+	hw_phy_id |= (hw_phy_id_2 & 0x03ff) <<  0;
+
+	hw_phy_id_masked = hw_phy_id & PHY_ID_MASK;
+
+	if (!err && KNOWN_PHY_ID(hw_phy_id_masked)) {
+		tp->phy_id = hw_phy_id;
+	} else {
+		/* phy_id currently holds the value found in the
+		 * subsys_id_to_phy_id[] table or PHY_ID_INVALID
+		 * if a match was not found there.
+		 */
+		if (tp->phy_id == PHY_ID_INVALID) {
+			if (!eeprom_signature_found ||
+			    !KNOWN_PHY_ID(eeprom_phy_id & PHY_ID_MASK))
+				return -ENODEV;
+			tp->phy_id = eeprom_phy_id;
+		}
+	}
+
+	err = tg3_phy_reset(tp, 1);
+	if (err)
+		return err;
+
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0) {
+		u32 mii_tg3_ctrl;
+		
+		/* These chips, when reset, only advertise 10Mb
+		 * capabilities.  Fix that.
+		 */
+		err  = tg3_writephy(tp, MII_ADVERTISE,
+				    (ADVERTISE_CSMA |
+				     ADVERTISE_PAUSE_CAP |
+				     ADVERTISE_10HALF |
+				     ADVERTISE_10FULL |
+				     ADVERTISE_100HALF |
+				     ADVERTISE_100FULL));
+		mii_tg3_ctrl = (MII_TG3_CTRL_ADV_1000_HALF |
+				MII_TG3_CTRL_ADV_1000_FULL |
+				MII_TG3_CTRL_AS_MASTER |
+				MII_TG3_CTRL_ENABLE_AS_MASTER);
+		if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
+			mii_tg3_ctrl = 0;
+
+		err |= tg3_writephy(tp, MII_TG3_CTRL, mii_tg3_ctrl);
+		err |= tg3_writephy(tp, MII_BMCR,
+				    (BMCR_ANRESTART | BMCR_ANENABLE));
+	}
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703) {
+		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
+		tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x201f);
+		tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x2aaa);
+	}
+
+	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) &&
+	    (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0)) {
+		tg3_writephy(tp, 0x1c, 0x8d68);
+		tg3_writephy(tp, 0x1c, 0x8d68);
+	}
+
+	/* Enable Ethernet@WireSpeed */
+	tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x7007);
+	tg3_readphy(tp, MII_TG3_AUX_CTRL, &val);
+	tg3_writephy(tp, MII_TG3_AUX_CTRL, (val | (1 << 15) | (1 << 4)));
+
+	if (!err && ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401)) {
+		err = tg3_init_5401phy_dsp(tp);
+	}
+
+	/* Determine the PHY led mode. */
+	if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) {
+		tp->led_mode = led_mode_link10;
+	} else {
+		tp->led_mode = led_mode_three_link;
+		if (eeprom_signature_found &&
+		    eeprom_led_mode != led_mode_auto)
+			tp->led_mode = eeprom_led_mode;
+	}
+
+	if (tp->phy_id == PHY_ID_SERDES)
+		tp->link_config.advertising =
+			(ADVERTISED_1000baseT_Half |
+			 ADVERTISED_1000baseT_Full |
+			 ADVERTISED_Autoneg |
+			 ADVERTISED_FIBRE);
+	if (tp->tg3_flags & TG3_FLAG_10_100_ONLY)
+		tp->link_config.advertising &=
+			~(ADVERTISED_1000baseT_Half |
+			  ADVERTISED_1000baseT_Full);
+
+	return err;
+}
+
+static void __devinit tg3_read_partno(struct tg3 *tp)
+{
+	unsigned char vpd_data[256];
+	int i;
+
+	for (i = 0; i < 256; i += 4) {
+		u32 tmp;
+
+		if (tg3_nvram_read(tp, 0x100 + i, &tmp))
+			goto out_not_found;
+
+		vpd_data[i + 0] = ((tmp >>  0) & 0xff);
+		vpd_data[i + 1] = ((tmp >>  8) & 0xff);
+		vpd_data[i + 2] = ((tmp >> 16) & 0xff);
+		vpd_data[i + 3] = ((tmp >> 24) & 0xff);
+	}
+
+	/* Now parse and find the part number. */
+	for (i = 0; i < 256; ) {
+		unsigned char val = vpd_data[i];
+		int block_end;
+
+		if (val == 0x82 || val == 0x91) {
+			i = (i + 3 +
+			     (vpd_data[i + 1] +
+			      (vpd_data[i + 2] << 8)));
+			continue;
+		}
+
+		if (val != 0x90)
+			goto out_not_found;
+
+		block_end = (i + 3 +
+			     (vpd_data[i + 1] +
+			      (vpd_data[i + 2] << 8)));
+		i += 3;
+		while (i < block_end) {
+			if (vpd_data[i + 0] == 'P' &&
+			    vpd_data[i + 1] == 'N') {
+				int partno_len = vpd_data[i + 2];
+
+				if (partno_len > 24)
+					goto out_not_found;
+
+				memcpy(tp->board_part_number,
+				       &vpd_data[i + 3],
+				       partno_len);
+
+				/* Success. */
+				return;
+			}
+		}
+
+		/* Part number not found. */
+		goto out_not_found;
+	}
+
+out_not_found:
+	strcpy(tp->board_part_number, "none");
+}
+
+static int __devinit tg3_get_invariants(struct tg3 *tp)
+{
+	u32 misc_ctrl_reg;
+	u32 cacheline_sz_reg;
+	u32 pci_state_reg, grc_misc_cfg;
+	u16 pci_cmd;
+	int err;
+
+	/* If we have an AMD 762 or Intel ICH/ICH0 chipset, write
+	 * reordering to the mailbox registers done by the host
+	 * controller can cause major troubles.  We read back from
+	 * every mailbox register write to force the writes to be
+	 * posted to the chip in order.
+	 */
+	if (pci_find_device(PCI_VENDOR_ID_INTEL,
+			    PCI_DEVICE_ID_INTEL_82801AA_8, NULL) ||
+	    pci_find_device(PCI_VENDOR_ID_INTEL,
+			    PCI_DEVICE_ID_INTEL_82801AB_8, NULL) ||
+	    pci_find_device(PCI_VENDOR_ID_AMD,
+			    PCI_DEVICE_ID_AMD_FE_GATE_700C, NULL))
+		tp->tg3_flags |= TG3_FLAG_MBOX_WRITE_REORDER;
+
+	/* Force memory write invalidate off.  If we leave it on,
+	 * then on 5700_BX chips we have to enable a workaround.
+	 * The workaround is to set the TG3PCI_DMA_RW_CTRL boundry
+	 * to match the cacheline size.  The Broadcom driver have this
+	 * workaround but turns MWI off all the times so never uses
+	 * it.  This seems to suggest that the workaround is insufficient.
+	 */
+	pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
+	pci_cmd &= ~PCI_COMMAND_INVALIDATE;
+	pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
+
+	/* It is absolutely critical that TG3PCI_MISC_HOST_CTRL
+	 * has the register indirect write enable bit set before
+	 * we try to access any of the MMIO registers.  It is also
+	 * critical that the PCI-X hw workaround situation is decided
+	 * before that as well.
+	 */
+	pci_read_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+			      &misc_ctrl_reg);
+
+	tp->pci_chip_rev_id = (misc_ctrl_reg >>
+			       MISC_HOST_CTRL_CHIPREV_SHIFT);
+
+	/* Initialize misc host control in PCI block. */
+	tp->misc_host_ctrl |= (misc_ctrl_reg &
+			       MISC_HOST_CTRL_CHIPREV);
+	pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+			       tp->misc_host_ctrl);
+
+	pci_read_config_dword(tp->pdev, TG3PCI_CACHELINESZ,
+			      &cacheline_sz_reg);
+
+	tp->pci_cacheline_sz = (cacheline_sz_reg >>  0) & 0xff;
+	tp->pci_lat_timer    = (cacheline_sz_reg >>  8) & 0xff;
+	tp->pci_hdr_type     = (cacheline_sz_reg >> 16) & 0xff;
+	tp->pci_bist         = (cacheline_sz_reg >> 24) & 0xff;
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
+	    tp->pci_lat_timer < 64) {
+		tp->pci_lat_timer = 64;
+
+		cacheline_sz_reg  = ((tp->pci_cacheline_sz & 0xff) <<  0);
+		cacheline_sz_reg |= ((tp->pci_lat_timer    & 0xff) <<  8);
+		cacheline_sz_reg |= ((tp->pci_hdr_type     & 0xff) << 16);
+		cacheline_sz_reg |= ((tp->pci_bist         & 0xff) << 24);
+
+		pci_write_config_dword(tp->pdev, TG3PCI_CACHELINESZ,
+				       cacheline_sz_reg);
+	}
+
+	pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
+			      &pci_state_reg);
+
+	if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0) {
+		tp->tg3_flags |= TG3_FLAG_PCIX_MODE;
+
+		/* If this is a 5700 BX chipset, and we are in PCI-X
+		 * mode, enable register write workaround.
+		 *
+		 * The workaround is to use indirect register accesses
+		 * for all chip writes not to mailbox registers.
+		 */
+		if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX) {
+			u32 pm_reg;
+			u16 pci_cmd;
+
+			tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
+
+			/* The chip can have it's power management PCI config
+			 * space registers clobbered due to this bug.
+			 * So explicitly force the chip into D0 here.
+			 */
+			pci_read_config_dword(tp->pdev, TG3PCI_PM_CTRL_STAT,
+					      &pm_reg);
+			pm_reg &= ~PCI_PM_CTRL_STATE_MASK;
+			pm_reg |= PCI_PM_CTRL_PME_ENABLE | 0 /* D0 */;
+			pci_write_config_dword(tp->pdev, TG3PCI_PM_CTRL_STAT,
+					       pm_reg);
+
+			/* Also, force SERR#/PERR# in PCI command. */
+			pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
+			pci_cmd |= PCI_COMMAND_PARITY | PCI_COMMAND_SERR;
+			pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
+		}
+	}
+	if ((pci_state_reg & PCISTATE_BUS_SPEED_HIGH) != 0)
+		tp->tg3_flags |= TG3_FLAG_PCI_HIGH_SPEED;
+	if ((pci_state_reg & PCISTATE_BUS_32BIT) != 0)
+		tp->tg3_flags |= TG3_FLAG_PCI_32BIT;
+
+	/* Chip-specific fixup from Broadcom driver */
+	if ((tp->pci_chip_rev_id == CHIPREV_ID_5704_A0) &&
+	    (!(pci_state_reg & PCISTATE_RETRY_SAME_DMA))) {
+		pci_state_reg |= PCISTATE_RETRY_SAME_DMA;
+		pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg);
+	}
+
+	/* Force the chip into D0. */
+	err = tg3_set_power_state(tp, 0);
+	if (err) {
+		printk(KERN_ERR PFX "(%s) transition to D0 failed\n",
+		       tp->pdev->slot_name);
+		return err;
+	}
+
+	/* 5700 B0 chips do not support checksumming correctly due
+	 * to hardware bugs.
+	 */
+	if (tp->pci_chip_rev_id == CHIPREV_ID_5700_B0)
+		tp->tg3_flags |= TG3_FLAG_BROKEN_CHECKSUMS;
+
+	/* Regardless of whether checksums work or not, we configure
+	 * the StrongARM chips to not compute the pseudo header checksums
+	 * in either direction.  Because of the way Linux checksum support
+	 * works we do not need the chips to do this, and taking the load
+	 * off of the TX/RX onboard StrongARM cpus means that they will not be
+	 * the bottleneck.  Whoever wrote Broadcom's driver did not
+	 * understand the situation at all.  He could have bothered
+	 * to read Jes's Acenic driver because the logic (and this part of
+	 * the Tigon2 hardware/firmware) is pretty much identical.
+	 */
+	tp->tg3_flags |= TG3_FLAG_NO_TX_PSEUDO_CSUM;
+	tp->tg3_flags |= TG3_FLAG_NO_RX_PSEUDO_CSUM;
+
+	/* Derive initial jumbo mode from MTU assigned in
+	 * ether_setup() via the alloc_etherdev() call
+	 */
+	if (tp->dev->mtu > ETH_DATA_LEN)
+		tp->tg3_flags |= TG3_FLAG_JUMBO_ENABLE;
+
+	/* Determine WakeOnLan speed to use. */
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
+	    tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
+	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0 ||
+	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B2) {
+		tp->tg3_flags &= ~(TG3_FLAG_WOL_SPEED_100MB);
+	} else {
+		tp->tg3_flags |= TG3_FLAG_WOL_SPEED_100MB;
+	}
+
+	/* Only 5701 and later support tagged irq status mode.
+	 *
+	 * However, since we are using NAPI avoid tagged irq status
+	 * because the interrupt condition is more difficult to
+	 * fully clear in that mode.
+	 */
+	tp->coalesce_mode = 0;
+
+	if (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_AX &&
+	    GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX)
+		tp->coalesce_mode |= HOSTCC_MODE_32BYTE;
+
+	/* Initialize MAC MI mode, polling disabled. */
+	tw32(MAC_MI_MODE, tp->mi_mode);
+	tr32(MAC_MI_MODE);
+	udelay(40);
+
+	/* Initialize data/descriptor byte/word swapping. */
+	tw32(GRC_MODE, tp->grc_mode);
+
+	tg3_switch_clocks(tp);
+
+	/* Clear this out for sanity. */
+	tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);
+
+	pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
+			      &pci_state_reg);
+	if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0 &&
+	    (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) == 0) {
+		u32 chiprevid = GET_CHIP_REV_ID(tp->misc_host_ctrl);
+
+		if (chiprevid == CHIPREV_ID_5701_A0 ||
+		    chiprevid == CHIPREV_ID_5701_B0 ||
+		    chiprevid == CHIPREV_ID_5701_B2 ||
+		    chiprevid == CHIPREV_ID_5701_B5) {
+			unsigned long sram_base;
+
+			/* Write some dummy words into the SRAM status block
+			 * area, see if it reads back correctly.  If the return
+			 * value is bad, force enable the PCIX workaround.
+			 */
+			sram_base = tp->regs + NIC_SRAM_WIN_BASE + NIC_SRAM_STATS_BLK;
+
+			writel(0x00000000, sram_base);
+			writel(0x00000000, sram_base + 4);
+			writel(0xffffffff, sram_base + 4);
+			if (readl(sram_base) != 0x00000000)
+				tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG;
+		}
+	}
+
+	udelay(50);
+	tg3_nvram_init(tp);
+
+	/* Determine if TX descriptors will reside in
+	 * main memory or in the chip SRAM.
+	 */
+	if (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG)
+		tp->tg3_flags |= TG3_FLAG_HOST_TXDS;
+
+	/* Quick sanity check.  Make sure we see an expected
+	 * value here.
+	 */
+	grc_misc_cfg = tr32(GRC_MISC_CFG);
+	grc_misc_cfg &= GRC_MISC_CFG_BOARD_ID_MASK;
+	if (grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5700 &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5701 &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5702FE &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5703 &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5703S &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704 &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704_A2 &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_5704_X &&
+	    grc_misc_cfg != GRC_MISC_CFG_BOARD_ID_AC91002A1) {
+		printk(KERN_ERR PFX "(%s) unknown board id 0x%08X\n",
+		       tp->pdev->slot_name, grc_misc_cfg);
+		return -ENODEV;
+	}
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 &&
+	    grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5704CIOBE) {
+		tp->tg3_flags |= TG3_FLAG_SPLIT_MODE;
+		tp->split_mode_max_reqs = SPLIT_MODE_5704_MAX_REQ;
+	}
+
+	/* ROFL, you should see Broadcom's driver code implementing
+	 * this, stuff like "if (a || b)" where a and b are always
+	 * mutually exclusive.  DaveM finds like 6 bugs today, hello!
+	 */
+	if (grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5702FE)
+		tp->tg3_flags |= TG3_FLAG_10_100_ONLY;
+
+	err = tg3_phy_probe(tp);
+	if (err) {
+		printk(KERN_ERR PFX "(%s) phy probe failed, err %d\n",
+		       tp->pdev->slot_name, err);
+		/* ... but do not return immediately ... */
+	}
+
+	tg3_read_partno(tp);
+
+	if (tp->phy_id == PHY_ID_SERDES) {
+		tp->tg3_flags &= ~TG3_FLAG_USE_MI_INTERRUPT;
+
+		/* And override led_mode in case Dell ever makes
+		 * a fibre board.
+		 */
+		tp->led_mode = led_mode_three_link;
+	} else {
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+			tp->tg3_flags |= TG3_FLAG_USE_MI_INTERRUPT;
+		else
+			tp->tg3_flags &= ~TG3_FLAG_USE_MI_INTERRUPT;
+	}
+
+	/* 5700 {AX,BX} chips have a broken status block link
+	 * change bit implementation, so we must use the
+	 * status register in those cases.
+	 */
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+		tp->tg3_flags |= TG3_FLAG_USE_LINKCHG_REG;
+	else
+		tp->tg3_flags &= ~TG3_FLAG_USE_LINKCHG_REG;
+
+	/* The led_mode is set during tg3_phy_probe, here we might
+	 * have to force the link status polling mechanism based
+	 * upon subsystem IDs.
+	 */
+	if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
+	    tp->phy_id != PHY_ID_SERDES) {
+		tp->tg3_flags |= (TG3_FLAG_USE_MI_INTERRUPT |
+				  TG3_FLAG_USE_LINKCHG_REG);
+	}
+
+	/* For all SERDES we poll the MAC status register. */
+	if (tp->phy_id == PHY_ID_SERDES)
+		tp->tg3_flags |= TG3_FLAG_POLL_SERDES;
+	else
+		tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
+
+	/* 5700 BX chips need to have their TX producer index mailboxes
+	 * written twice to workaround a bug.
+	 */
+	if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX)
+		tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG;
+	else
+		tp->tg3_flags &= ~TG3_FLAG_TXD_MBOX_HWBUG;
+
+	/* 5700 chips can get confused if TX buffers straddle the
+	 * 4GB address boundary in some cases.
+	 */
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
+		/* ROFL!  Latest Broadcom driver disables NETIF_F_HIGHDMA
+		 * in this case instead of fixing their workaround code.
+		 *
+		 * Like, hey, there is this skb_copy() thing guys,
+		 * use it.  Oh I can't stop laughing...
+		 */
+		tp->dev->hard_start_xmit = tg3_start_xmit_4gbug;
+	} else {
+		tp->dev->hard_start_xmit = tg3_start_xmit;
+	}
+
+	tp->rx_offset = 2;
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 &&
+	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0)
+                printk("WARNING: This card may not support unaligned receive pointers.\n");
+		//tp->rx_offset = 0;
+
+	/* By default, disable wake-on-lan.  User can change this
+	 * using ETHTOOL_SWOL.
+	 */
+	tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE;
+
+	return err;
+}
+
+static int __devinit tg3_get_device_address(struct tg3 *tp)
+{
+	struct net_device *dev = tp->dev;
+	u32 hi, lo, mac_offset;
+
+	if (PCI_FUNC(tp->pdev->devfn) == 0)
+		mac_offset = 0x7c;
+	else
+		mac_offset = 0xcc;
+
+	/* First try to get it from MAC address mailbox. */
+	tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_HIGH_MBOX, &hi);
+	if ((hi >> 16) == 0x484b) {
+		dev->dev_addr[0] = (hi >>  8) & 0xff;
+		dev->dev_addr[1] = (hi >>  0) & 0xff;
+
+		tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_LOW_MBOX, &lo);
+		dev->dev_addr[2] = (lo >> 24) & 0xff;
+		dev->dev_addr[3] = (lo >> 16) & 0xff;
+		dev->dev_addr[4] = (lo >>  8) & 0xff;
+		dev->dev_addr[5] = (lo >>  0) & 0xff;
+	}
+	/* Next, try NVRAM. */
+	else if (!tg3_nvram_read(tp, mac_offset + 0, &hi) &&
+		 !tg3_nvram_read(tp, mac_offset + 4, &lo)) {
+		dev->dev_addr[0] = ((hi >> 16) & 0xff);
+		dev->dev_addr[1] = ((hi >> 24) & 0xff);
+		dev->dev_addr[2] = ((lo >>  0) & 0xff);
+		dev->dev_addr[3] = ((lo >>  8) & 0xff);
+		dev->dev_addr[4] = ((lo >> 16) & 0xff);
+		dev->dev_addr[5] = ((lo >> 24) & 0xff);
+	}
+	/* Finally just fetch it out of the MAC control regs. */
+	else {
+		hi = tr32(MAC_ADDR_0_HIGH);
+		lo = tr32(MAC_ADDR_0_LOW);
+
+		dev->dev_addr[5] = lo & 0xff;
+		dev->dev_addr[4] = (lo >> 8) & 0xff;
+		dev->dev_addr[3] = (lo >> 16) & 0xff;
+		dev->dev_addr[2] = (lo >> 24) & 0xff;
+		dev->dev_addr[1] = hi & 0xff;
+		dev->dev_addr[0] = (hi >> 8) & 0xff;
+	}
+
+	if (!is_valid_ether_addr(&dev->dev_addr[0]))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dma, int size, int to_device)
+{
+	struct tg3_internal_buffer_desc test_desc;
+	u32 sram_dma_descs;
+	int i, ret;
+
+	sram_dma_descs = NIC_SRAM_DMA_DESC_POOL_BASE;
+
+	tw32(FTQ_RCVBD_COMP_FIFO_ENQDEQ, 0);
+	tw32(FTQ_RCVDATA_COMP_FIFO_ENQDEQ, 0);
+	tw32(RDMAC_STATUS, 0);
+	tw32(WDMAC_STATUS, 0);
+
+	tw32(BUFMGR_MODE, 0);
+	tw32(FTQ_RESET, 0);
+
+	/* pci_alloc_consistent gives only non-DAC addresses */
+	test_desc.addr_hi = 0;
+	test_desc.addr_lo = buf_dma & 0xffffffff;
+	test_desc.nic_mbuf = 0x00002100;
+	test_desc.len = size;
+	if (to_device) {
+		test_desc.cqid_sqid = (13 << 8) | 2;
+		tw32(RDMAC_MODE, RDMAC_MODE_RESET);
+		tr32(RDMAC_MODE);
+		udelay(40);
+
+		tw32(RDMAC_MODE, RDMAC_MODE_ENABLE);
+		tr32(RDMAC_MODE);
+		udelay(40);
+	} else {
+		test_desc.cqid_sqid = (16 << 8) | 7;
+		tw32(WDMAC_MODE, WDMAC_MODE_RESET);
+		tr32(WDMAC_MODE);
+		udelay(40);
+
+		tw32(WDMAC_MODE, WDMAC_MODE_ENABLE);
+		tr32(WDMAC_MODE);
+		udelay(40);
+	}
+	test_desc.flags = 0x00000004;
+
+	for (i = 0; i < (sizeof(test_desc) / sizeof(u32)); i++) {
+		u32 val;
+
+		val = *(((u32 *)&test_desc) + i);
+		pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR,
+				       sram_dma_descs + (i * sizeof(u32)));
+		pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val);
+	}
+	pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0);
+
+	if (to_device) {
+		tw32(FTQ_DMA_HIGH_READ_FIFO_ENQDEQ, sram_dma_descs);
+	} else {
+		tw32(FTQ_DMA_HIGH_WRITE_FIFO_ENQDEQ, sram_dma_descs);
+	}
+
+	ret = -ENODEV;
+	for (i = 0; i < 40; i++) {
+		u32 val;
+
+		if (to_device)
+			val = tr32(FTQ_RCVBD_COMP_FIFO_ENQDEQ);
+		else
+			val = tr32(FTQ_RCVDATA_COMP_FIFO_ENQDEQ);
+		if ((val & 0xffff) == sram_dma_descs) {
+			ret = 0;
+			break;
+		}
+
+		udelay(100);
+	}
+
+	return ret;
+}
+
+#define TEST_BUFFER_SIZE	0x400
+
+static int __devinit tg3_test_dma(struct tg3 *tp)
+{
+	dma_addr_t buf_dma;
+	u32 *buf;
+	int ret;
+
+	buf = pci_alloc_consistent(tp->pdev, TEST_BUFFER_SIZE, &buf_dma);
+	if (!buf) {
+		ret = -ENOMEM;
+		goto out_nofree;
+	}
+
+	tw32(TG3PCI_CLOCK_CTRL, 0);
+
+	if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) == 0) {
+		tp->dma_rwctrl =
+			(0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
+			(0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) |
+			(0x7 << DMA_RWCTRL_WRITE_WATER_SHIFT) |
+			(0x7 << DMA_RWCTRL_READ_WATER_SHIFT) |
+			(0x0f << DMA_RWCTRL_MIN_DMA_SHIFT);
+	} else {
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
+			tp->dma_rwctrl =
+				(0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
+				(0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) |
+				(0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) |
+				(0x7 << DMA_RWCTRL_READ_WATER_SHIFT) |
+				(0x00 << DMA_RWCTRL_MIN_DMA_SHIFT);
+		else
+			tp->dma_rwctrl =
+				(0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
+				(0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT) |
+				(0x3 << DMA_RWCTRL_WRITE_WATER_SHIFT) |
+				(0x3 << DMA_RWCTRL_READ_WATER_SHIFT) |
+				(0x0f << DMA_RWCTRL_MIN_DMA_SHIFT);
+
+		/* Wheee, some more chip bugs... */
+		if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1 ||
+		    tp->pci_chip_rev_id == CHIPREV_ID_5703_A2 ||
+		    tp->pci_chip_rev_id == CHIPREV_ID_5703_A3 ||
+		    tp->pci_chip_rev_id == CHIPREV_ID_5704_A0)
+			tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
+	}
+
+	/* We don't do this on x86 because it seems to hurt performace.
+	 * It does help things on other platforms though.
+	 */
+#ifndef CONFIG_X86
+	{
+		u8 byte;
+		int cacheline_size;
+		pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte);
+
+		if (byte == 0)
+			cacheline_size = 1024;
+		else
+			cacheline_size = (int) byte * 4;
+
+		tp->dma_rwctrl &= ~(DMA_RWCTRL_READ_BNDRY_MASK |
+				    DMA_RWCTRL_WRITE_BNDRY_MASK);
+
+		switch (cacheline_size) {
+		case 16:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_16 |
+				 DMA_RWCTRL_WRITE_BNDRY_16);
+			break;
+
+		case 32:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_32 |
+				 DMA_RWCTRL_WRITE_BNDRY_32);
+			break;
+
+		case 64:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_64 |
+				 DMA_RWCTRL_WRITE_BNDRY_64);
+			break;
+
+		case 128:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_128 |
+				 DMA_RWCTRL_WRITE_BNDRY_128);
+			break;
+
+		case 256:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_256 |
+				 DMA_RWCTRL_WRITE_BNDRY_256);
+			break;
+
+		case 512:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_512 |
+				 DMA_RWCTRL_WRITE_BNDRY_512);
+			break;
+
+		case 1024:
+			tp->dma_rwctrl |=
+				(DMA_RWCTRL_READ_BNDRY_1024 |
+				 DMA_RWCTRL_WRITE_BNDRY_1024);
+			break;
+		};
+	}
+#endif
+
+	/* Remove this if it causes problems for some boards. */
+	tp->dma_rwctrl |= DMA_RWCTRL_USE_MEM_READ_MULT;
+
+	tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
+		return 0;
+
+	ret = 0;
+	while (1) {
+		u32 *p, i;
+
+		p = buf;
+		for (i = 0; i < TEST_BUFFER_SIZE / sizeof(u32); i++)
+			p[i] = i;
+
+		/* Send the buffer to the chip. */
+		ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, 1);
+		if (ret)
+			break;
+
+		p = buf;
+		for (i = 0; i < TEST_BUFFER_SIZE / sizeof(u32); i++)
+			p[i] = 0;
+
+		/* Now read it back. */
+		ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, 0);
+		if (ret)
+			break;
+
+		/* Verify it. */
+		p = buf;
+		for (i = 0; i < TEST_BUFFER_SIZE / sizeof(u32); i++) {
+			if (p[i] == i)
+				continue;
+
+			if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) ==
+			    DMA_RWCTRL_WRITE_BNDRY_DISAB) {
+				tp->dma_rwctrl |= DMA_RWCTRL_WRITE_BNDRY_16;
+				tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+				break;
+			} else {
+				ret = -ENODEV;
+				goto out;
+			}
+		}
+
+		if (i == (TEST_BUFFER_SIZE / sizeof(u32))) {
+			/* Success. */
+			ret = 0;
+			break;
+		}
+	}
+
+out:
+	pci_free_consistent(tp->pdev, TEST_BUFFER_SIZE, buf, buf_dma);
+out_nofree:
+	return ret;
+}
+
+static void __devinit tg3_init_link_config(struct tg3 *tp)
+{
+	tp->link_config.advertising =
+		(ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
+		 ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
+		 ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full |
+		 ADVERTISED_Autoneg | ADVERTISED_MII);
+	tp->link_config.speed = SPEED_INVALID;
+	tp->link_config.duplex = DUPLEX_INVALID;
+	tp->link_config.autoneg = AUTONEG_ENABLE;
+	netif_carrier_off(tp->dev);
+	tp->link_config.active_speed = SPEED_INVALID;
+	tp->link_config.active_duplex = DUPLEX_INVALID;
+	tp->link_config.phy_is_low_power = 0;
+	tp->link_config.orig_speed = SPEED_INVALID;
+	tp->link_config.orig_duplex = DUPLEX_INVALID;
+	tp->link_config.orig_autoneg = AUTONEG_INVALID;
+}
+
+static void __devinit tg3_init_bufmgr_config(struct tg3 *tp)
+{
+	tp->bufmgr_config.mbuf_read_dma_low_water =
+		DEFAULT_MB_RDMA_LOW_WATER;
+	tp->bufmgr_config.mbuf_mac_rx_low_water =
+		DEFAULT_MB_MACRX_LOW_WATER;
+	tp->bufmgr_config.mbuf_high_water =
+		DEFAULT_MB_HIGH_WATER;
+
+	tp->bufmgr_config.mbuf_read_dma_low_water_jumbo =
+		DEFAULT_MB_RDMA_LOW_WATER_JUMBO;
+	tp->bufmgr_config.mbuf_mac_rx_low_water_jumbo =
+		DEFAULT_MB_MACRX_LOW_WATER_JUMBO;
+	tp->bufmgr_config.mbuf_high_water_jumbo =
+		DEFAULT_MB_HIGH_WATER_JUMBO;
+
+	tp->bufmgr_config.dma_low_water = DEFAULT_DMA_LOW_WATER;
+	tp->bufmgr_config.dma_high_water = DEFAULT_DMA_HIGH_WATER;
+}
+
+static char * __devinit tg3_phy_string(struct tg3 *tp)
+{
+	switch (tp->phy_id & PHY_ID_MASK) {
+	case PHY_ID_BCM5400:	return "5400";
+	case PHY_ID_BCM5401:	return "5401";
+	case PHY_ID_BCM5411:	return "5411";
+	case PHY_ID_BCM5701:	return "5701";
+	case PHY_ID_BCM5703:	return "5703";
+	case PHY_ID_BCM5704:	return "5704";
+	case PHY_ID_BCM8002:	return "8002";
+	case PHY_ID_SERDES:	return "serdes";
+	default:		return "unknown";
+	};
+}
+
+static int __devinit tg3_init_one(struct pci_dev *pdev,
+				  const struct pci_device_id *ent)
+{
+	static int tg3_version_printed = 0;
+	unsigned long tg3reg_base, tg3reg_len;
+	struct net_device *dev;
+	struct tg3 *tp;
+	int i, err, pci_using_dac, pm_cap;
+
+	if (tg3_version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot enable PCI device, "
+		       "aborting.\n");
+		return err;
+	}
+
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "Cannot find proper PCI device "
+		       "base address, aborting.\n");
+		err = -ENODEV;
+		goto err_out_disable_pdev;
+	}
+
+	err = pci_request_regions(pdev, DRV_MODULE_NAME);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot obtain PCI resources, "
+		       "aborting.\n");
+		goto err_out_disable_pdev;
+	}
+
+	pci_set_master(pdev);
+
+	/* Find power-management capability. */
+	pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM);
+	if (pm_cap == 0) {
+		printk(KERN_ERR PFX "Cannot find PowerManagement capability, "
+		       "aborting.\n");
+		goto err_out_free_res;
+	}
+
+	/* Configure DMA attributes. */
+	if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) {
+		pci_using_dac = 1;
+	} else {
+		err = pci_set_dma_mask(pdev, (u64) 0xffffffff);
+		if (err) {
+			printk(KERN_ERR PFX "No usable DMA configuration, "
+			       "aborting.\n");
+			goto err_out_free_res;
+		}
+		pci_using_dac = 0;
+	}
+
+	tg3reg_base = pci_resource_start(pdev, 0);
+	tg3reg_len = pci_resource_len(pdev, 0);
+
+	dev = alloc_etherdev(sizeof(*tp));
+	if (!dev) {
+		printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
+		err = -ENOMEM;
+		goto err_out_free_res;
+	}
+
+	SET_MODULE_OWNER(dev);
+
+	if (pci_using_dac)
+		dev->features |= NETIF_F_HIGHDMA;
+#if TG3_VLAN_TAG_USED
+	dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+	dev->vlan_rx_register = tg3_vlan_rx_register;
+	dev->vlan_rx_kill_vid = tg3_vlan_rx_kill_vid;
+#endif
+#if TG3_DO_TSO != 0
+	dev->features |= NETIF_F_TSO;
+#endif
+
+	tp = dev->priv;
+	tp->pdev = pdev;
+	tp->dev = dev;
+	tp->pm_cap = pm_cap;
+	tp->mac_mode = TG3_DEF_MAC_MODE;
+	tp->rx_mode = TG3_DEF_RX_MODE;
+	tp->tx_mode = TG3_DEF_TX_MODE;
+	tp->mi_mode = MAC_MI_MODE_BASE;
+	if (tg3_debug > 0)
+		tp->msg_enable = tg3_debug;
+	else
+		tp->msg_enable = TG3_DEF_MSG_ENABLE;
+
+	/* The word/byte swap controls here control register access byte
+	 * swapping.  DMA data byte swapping is controlled in the GRC_MODE
+	 * setting below.
+	 */
+	tp->misc_host_ctrl =
+		MISC_HOST_CTRL_MASK_PCI_INT |
+		MISC_HOST_CTRL_WORD_SWAP |
+		MISC_HOST_CTRL_INDIR_ACCESS |
+		MISC_HOST_CTRL_PCISTATE_RW;
+
+	/* The NONFRM (non-frame) byte/word swap controls take effect
+	 * on descriptor entries, anything which isn't packet data.
+	 *
+	 * The StrongARM chips on the board (one for tx, one for rx)
+	 * are running in big-endian mode.
+	 */
+	tp->grc_mode = (GRC_MODE_WSWAP_DATA | GRC_MODE_BSWAP_DATA |
+			GRC_MODE_WSWAP_NONFRM_DATA);
+#ifdef __BIG_ENDIAN
+	tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA;
+#endif
+	spin_lock_init(&tp->lock);
+	spin_lock_init(&tp->tx_lock);
+	spin_lock_init(&tp->indirect_lock);
+
+	tp->regs = (unsigned long) ioremap(tg3reg_base, tg3reg_len);
+	if (tp->regs == 0UL) {
+		printk(KERN_ERR PFX "Cannot map device registers, "
+		       "aborting.\n");
+		err = -ENOMEM;
+		goto err_out_free_dev;
+	}
+
+	tg3_init_link_config(tp);
+
+	tg3_init_bufmgr_config(tp);
+
+	tp->rx_pending = TG3_DEF_RX_RING_PENDING;
+	tp->rx_jumbo_pending = TG3_DEF_RX_JUMBO_RING_PENDING;
+	tp->tx_pending = TG3_DEF_TX_RING_PENDING;
+
+	dev->open = tg3_open;
+	dev->stop = tg3_close;
+	dev->get_stats = tg3_get_stats;
+	dev->set_multicast_list = tg3_set_rx_mode;
+	dev->set_mac_address = tg3_set_mac_addr;
+	dev->do_ioctl = tg3_ioctl;
+	dev->tx_timeout = tg3_tx_timeout;
+#ifdef NAPI
+	dev->poll = tg3_poll;
+	dev->weight = 64;
+#endif
+	dev->watchdog_timeo = TG3_TX_TIMEOUT;
+	dev->change_mtu = tg3_change_mtu;
+	dev->irq = pdev->irq;
+
+	err = tg3_get_invariants(tp);
+	if (err) {
+		printk(KERN_ERR PFX "Problem fetching invariants of chip, "
+		       "aborting.\n");
+		goto err_out_iounmap;
+	}
+
+	err = tg3_get_device_address(tp);
+	if (err) {
+		printk(KERN_ERR PFX "Could not obtain valid ethernet address, "
+		       "aborting.\n");
+		goto err_out_iounmap;
+	}
+
+	err = tg3_test_dma(tp);
+	if (err) {
+		printk(KERN_ERR PFX "DMA engine test failed, aborting.\n");
+		goto err_out_iounmap;
+	}
+
+	/* Tigon3 can do ipv4 only... and some chips have buggy
+	 * checksumming.
+	 */
+	if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) {
+		dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+		tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
+	} else
+		tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
+
+	err = register_netdev(dev);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot register net device, "
+		       "aborting.\n");
+		goto err_out_iounmap;
+	}
+
+	pci_set_drvdata(pdev, dev);
+
+	/* Now that we have fully setup the chip, save away a snapshot
+	 * of the PCI config space.  We need to restore this after
+	 * GRC_MISC_CFG core clock resets and some resume events.
+	 */
+	pci_save_state(tp->pdev, tp->pci_cfg_state);
+
+	printk(KERN_INFO "%s: Tigon3 [partno(%s) rev %04x PHY(%s)] (PCI%s:%s:%s) %sBaseT Ethernet ",
+	       dev->name,
+	       tp->board_part_number,
+	       tp->pci_chip_rev_id,
+	       tg3_phy_string(tp),
+	       ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ? "X" : ""),
+	       ((tp->tg3_flags & TG3_FLAG_PCI_HIGH_SPEED) ?
+		((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ? "133MHz" : "66MHz") :
+		((tp->tg3_flags & TG3_FLAG_PCIX_MODE) ? "100MHz" : "33MHz")),
+	       ((tp->tg3_flags & TG3_FLAG_PCI_32BIT) ? "32-bit" : "64-bit"),
+	       (tp->tg3_flags & TG3_FLAG_10_100_ONLY) ? "10/100" : "10/100/1000");
+
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", dev->dev_addr[i],
+		       i == 5 ? '\n' : ':');
+
+	return 0;
+
+err_out_iounmap:
+	iounmap((void *) tp->regs);
+
+err_out_free_dev:
+	kfree(dev);
+
+err_out_free_res:
+	pci_release_regions(pdev);
+
+err_out_disable_pdev:
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	return err;
+}
+
+static void __devexit tg3_remove_one(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+
+	if (dev) {
+		unregister_netdev(dev);
+		iounmap((void *) ((struct tg3 *)(dev->priv))->regs);
+		kfree(dev);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+		pci_set_drvdata(pdev, NULL);
+	}
+}
+
+static int tg3_suspend(struct pci_dev *pdev, u32 state)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct tg3 *tp = dev->priv;
+	int err;
+
+	if (!netif_running(dev))
+		return 0;
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+	tg3_disable_ints(tp);
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	netif_device_detach(dev);
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+	tg3_halt(tp);
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	err = tg3_set_power_state(tp, state);
+	if (err) {
+		spin_lock_irq(&tp->lock);
+		spin_lock(&tp->tx_lock);
+
+		tg3_init_rings(tp);
+		tg3_init_hw(tp);
+
+		spin_unlock(&tp->tx_lock);
+		spin_unlock_irq(&tp->lock);
+
+		netif_device_attach(dev);
+	}
+
+	return err;
+}
+
+static int tg3_resume(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct tg3 *tp = dev->priv;
+	int err;
+
+	if (!netif_running(dev))
+		return 0;
+
+	err = tg3_set_power_state(tp, 0);
+	if (err)
+		return err;
+
+	netif_device_attach(dev);
+
+	spin_lock_irq(&tp->lock);
+	spin_lock(&tp->tx_lock);
+
+	tg3_init_rings(tp);
+	tg3_init_hw(tp);
+	tg3_enable_ints(tp);
+
+	spin_unlock(&tp->tx_lock);
+	spin_unlock_irq(&tp->lock);
+
+	return 0;
+}
+
+static struct pci_driver tg3_driver = {
+	.name		= DRV_MODULE_NAME,
+	.id_table	= tg3_pci_tbl,
+	.probe		= tg3_init_one,
+	.remove		= __devexit_p(tg3_remove_one),
+	.suspend	= tg3_suspend,
+	.resume		= tg3_resume
+};
+
+static int __init tg3_init(void)
+{
+	return pci_module_init(&tg3_driver);
+}
+
+static void __exit tg3_cleanup(void)
+{
+	pci_unregister_driver(&tg3_driver);
+}
+
+module_init(tg3_init);
+module_exit(tg3_cleanup);
diff --git a/xen/drivers/net/tg3.h b/xen/drivers/net/tg3.h
new file mode 100644
index 0000000000..d816322d98
--- /dev/null
+++ b/xen/drivers/net/tg3.h
@@ -0,0 +1,1893 @@
+/* $Id: tg3.h,v 1.37.2.32 2002/03/11 12:18:18 davem Exp $
+ * tg3.h: Definitions for Broadcom Tigon3 ethernet driver.
+ *
+ * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
+ */
+
+#ifndef _T3_H
+#define _T3_H
+
+#define TG3_64BIT_REG_HIGH		0x00UL
+#define TG3_64BIT_REG_LOW		0x04UL
+
+/* Descriptor block info. */
+#define TG3_BDINFO_HOST_ADDR		0x0UL /* 64-bit */
+#define TG3_BDINFO_MAXLEN_FLAGS		0x8UL /* 32-bit */
+#define  BDINFO_FLAGS_USE_EXT_RECV	 0x00000001 /* ext rx_buffer_desc */
+#define  BDINFO_FLAGS_DISABLED		 0x00000002
+#define  BDINFO_FLAGS_MAXLEN_MASK	 0xffff0000
+#define  BDINFO_FLAGS_MAXLEN_SHIFT	 16
+#define TG3_BDINFO_NIC_ADDR		0xcUL /* 32-bit */
+#define TG3_BDINFO_SIZE			0x10UL
+
+#define RX_COPY_THRESHOLD  		0 //256
+
+#define RX_STD_MAX_SIZE			1536
+#define RX_JUMBO_MAX_SIZE		0xdeadbeef /* XXX */
+
+/* First 256 bytes are a mirror of PCI config space. */
+#define TG3PCI_VENDOR			0x00000000
+#define  TG3PCI_VENDOR_BROADCOM		 0x14e4
+#define TG3PCI_DEVICE			0x00000002
+#define  TG3PCI_DEVICE_TIGON3_1		 0x1644 /* BCM5700 */
+#define  TG3PCI_DEVICE_TIGON3_2		 0x1645 /* BCM5701 */
+#define  TG3PCI_DEVICE_TIGON3_3		 0x1646 /* BCM5702 */
+#define  TG3PCI_DEVICE_TIGON3_4		 0x1647 /* BCM5703 */
+#define TG3PCI_COMMAND			0x00000004
+#define TG3PCI_STATUS			0x00000006
+#define TG3PCI_CCREVID			0x00000008
+#define TG3PCI_CACHELINESZ		0x0000000c
+#define TG3PCI_LATTIMER			0x0000000d
+#define TG3PCI_HEADERTYPE		0x0000000e
+#define TG3PCI_BIST			0x0000000f
+#define TG3PCI_BASE0_LOW		0x00000010
+#define TG3PCI_BASE0_HIGH		0x00000014
+/* 0x18 --> 0x2c unused */
+#define TG3PCI_SUBSYSVENID		0x0000002c
+#define TG3PCI_SUBSYSID			0x0000002e
+#define TG3PCI_ROMADDR			0x00000030
+#define TG3PCI_CAPLIST			0x00000034
+/* 0x35 --> 0x3c unused */
+#define TG3PCI_IRQ_LINE			0x0000003c
+#define TG3PCI_IRQ_PIN			0x0000003d
+#define TG3PCI_MIN_GNT			0x0000003e
+#define TG3PCI_MAX_LAT			0x0000003f
+#define TG3PCI_X_CAPS			0x00000040
+#define  PCIX_CAPS_RELAXED_ORDERING	 0x00020000
+#define  PCIX_CAPS_SPLIT_MASK		 0x00700000
+#define  PCIX_CAPS_SPLIT_SHIFT		 20
+#define  PCIX_CAPS_BURST_MASK		 0x000c0000
+#define  PCIX_CAPS_BURST_SHIFT		 18
+#define  PCIX_CAPS_MAX_BURST_5704	 2
+#define TG3PCI_PM_CAP_PTR		0x00000041
+#define TG3PCI_X_COMMAND		0x00000042
+#define TG3PCI_X_STATUS			0x00000044
+#define TG3PCI_PM_CAP_ID		0x00000048
+#define TG3PCI_VPD_CAP_PTR		0x00000049
+#define TG3PCI_PM_CAPS			0x0000004a
+#define TG3PCI_PM_CTRL_STAT		0x0000004c
+#define TG3PCI_BR_SUPP_EXT		0x0000004e
+#define TG3PCI_PM_DATA			0x0000004f
+#define TG3PCI_VPD_CAP_ID		0x00000050
+#define TG3PCI_MSI_CAP_PTR		0x00000051
+#define TG3PCI_VPD_ADDR_FLAG		0x00000052
+#define  VPD_ADDR_FLAG_WRITE		0x00008000
+#define TG3PCI_VPD_DATA			0x00000054
+#define TG3PCI_MSI_CAP_ID		0x00000058
+#define TG3PCI_NXT_CAP_PTR		0x00000059
+#define TG3PCI_MSI_CTRL			0x0000005a
+#define TG3PCI_MSI_ADDR_LOW		0x0000005c
+#define TG3PCI_MSI_ADDR_HIGH		0x00000060
+#define TG3PCI_MSI_DATA			0x00000064
+/* 0x66 --> 0x68 unused */
+#define TG3PCI_MISC_HOST_CTRL		0x00000068
+#define  MISC_HOST_CTRL_CLEAR_INT	 0x00000001
+#define  MISC_HOST_CTRL_MASK_PCI_INT	 0x00000002
+#define  MISC_HOST_CTRL_BYTE_SWAP	 0x00000004
+#define  MISC_HOST_CTRL_WORD_SWAP	 0x00000008
+#define  MISC_HOST_CTRL_PCISTATE_RW	 0x00000010
+#define  MISC_HOST_CTRL_CLKREG_RW	 0x00000020
+#define  MISC_HOST_CTRL_REGWORD_SWAP	 0x00000040
+#define  MISC_HOST_CTRL_INDIR_ACCESS	 0x00000080
+#define  MISC_HOST_CTRL_IRQ_MASK_MODE	 0x00000100
+#define  MISC_HOST_CTRL_TAGGED_STATUS	 0x00000200
+#define  MISC_HOST_CTRL_CHIPREV		 0xffff0000
+#define  MISC_HOST_CTRL_CHIPREV_SHIFT	 16
+#define  GET_CHIP_REV_ID(MISC_HOST_CTRL) \
+	 (((MISC_HOST_CTRL) & MISC_HOST_CTRL_CHIPREV) >> \
+	  MISC_HOST_CTRL_CHIPREV_SHIFT)
+#define  CHIPREV_ID_5700_A0		 0x7000
+#define  CHIPREV_ID_5700_A1		 0x7001
+#define  CHIPREV_ID_5700_B0		 0x7100
+#define  CHIPREV_ID_5700_B1		 0x7101
+#define  CHIPREV_ID_5700_B3		 0x7102
+#define  CHIPREV_ID_5700_ALTIMA		 0x7104
+#define  CHIPREV_ID_5700_C0		 0x7200
+#define  CHIPREV_ID_5701_A0		 0x0000
+#define  CHIPREV_ID_5701_B0		 0x0100
+#define  CHIPREV_ID_5701_B2		 0x0102
+#define  CHIPREV_ID_5701_B5		 0x0105
+#define  CHIPREV_ID_5703_A0		 0x1000
+#define  CHIPREV_ID_5703_A1		 0x1001
+#define  CHIPREV_ID_5703_A2		 0x1002
+#define  CHIPREV_ID_5703_A3		 0x1003
+#define  CHIPREV_ID_5704_A0		 0x2000
+#define  CHIPREV_ID_5704_A1		 0x2001
+#define  CHIPREV_ID_5704_A2		 0x2002
+#define  GET_ASIC_REV(CHIP_REV_ID)	((CHIP_REV_ID) >> 12)
+#define   ASIC_REV_5700			 0x07
+#define   ASIC_REV_5701			 0x00
+#define   ASIC_REV_5703			 0x01
+#define   ASIC_REV_5704			 0x02
+#define  GET_CHIP_REV(CHIP_REV_ID)	((CHIP_REV_ID) >> 8)
+#define   CHIPREV_5700_AX		 0x70
+#define   CHIPREV_5700_BX		 0x71
+#define   CHIPREV_5700_CX		 0x72
+#define   CHIPREV_5701_AX		 0x00
+#define  GET_METAL_REV(CHIP_REV_ID)	((CHIP_REV_ID) & 0xff)
+#define   METAL_REV_A0			 0x00
+#define   METAL_REV_A1			 0x01
+#define   METAL_REV_B0			 0x00
+#define   METAL_REV_B1			 0x01
+#define   METAL_REV_B2			 0x02
+#define TG3PCI_DMA_RW_CTRL		0x0000006c
+#define  DMA_RWCTRL_MIN_DMA		 0x000000ff
+#define  DMA_RWCTRL_MIN_DMA_SHIFT	 0
+#define  DMA_RWCTRL_READ_BNDRY_MASK	 0x00000700
+#define  DMA_RWCTRL_READ_BNDRY_DISAB	 0x00000000
+#define  DMA_RWCTRL_READ_BNDRY_16	 0x00000100
+#define  DMA_RWCTRL_READ_BNDRY_32	 0x00000200
+#define  DMA_RWCTRL_READ_BNDRY_64	 0x00000300
+#define  DMA_RWCTRL_READ_BNDRY_128	 0x00000400
+#define  DMA_RWCTRL_READ_BNDRY_256	 0x00000500
+#define  DMA_RWCTRL_READ_BNDRY_512	 0x00000600
+#define  DMA_RWCTRL_READ_BNDRY_1024	 0x00000700
+#define  DMA_RWCTRL_WRITE_BNDRY_MASK	 0x00003800
+#define  DMA_RWCTRL_WRITE_BNDRY_DISAB	 0x00000000
+#define  DMA_RWCTRL_WRITE_BNDRY_16	 0x00000800
+#define  DMA_RWCTRL_WRITE_BNDRY_32	 0x00001000
+#define  DMA_RWCTRL_WRITE_BNDRY_64	 0x00001800
+#define  DMA_RWCTRL_WRITE_BNDRY_128	 0x00002000
+#define  DMA_RWCTRL_WRITE_BNDRY_256	 0x00002800
+#define  DMA_RWCTRL_WRITE_BNDRY_512	 0x00003000
+#define  DMA_RWCTRL_WRITE_BNDRY_1024	 0x00003800
+#define  DMA_RWCTRL_ONE_DMA		 0x00004000
+#define  DMA_RWCTRL_READ_WATER		 0x00070000
+#define  DMA_RWCTRL_READ_WATER_SHIFT	 16
+#define  DMA_RWCTRL_WRITE_WATER		 0x00380000
+#define  DMA_RWCTRL_WRITE_WATER_SHIFT	 19
+#define  DMA_RWCTRL_USE_MEM_READ_MULT	 0x00400000
+#define  DMA_RWCTRL_ASSERT_ALL_BE	 0x00800000
+#define  DMA_RWCTRL_PCI_READ_CMD	 0x0f000000
+#define  DMA_RWCTRL_PCI_READ_CMD_SHIFT	 24
+#define  DMA_RWCTRL_PCI_WRITE_CMD	 0xf0000000
+#define  DMA_RWCTRL_PCI_WRITE_CMD_SHIFT	 28
+#define TG3PCI_PCISTATE			0x00000070
+#define  PCISTATE_FORCE_RESET		 0x00000001
+#define  PCISTATE_INT_NOT_ACTIVE	 0x00000002
+#define  PCISTATE_CONV_PCI_MODE		 0x00000004
+#define  PCISTATE_BUS_SPEED_HIGH	 0x00000008
+#define  PCISTATE_BUS_32BIT		 0x00000010
+#define  PCISTATE_ROM_ENABLE		 0x00000020
+#define  PCISTATE_ROM_RETRY_ENABLE	 0x00000040
+#define  PCISTATE_FLAT_VIEW		 0x00000100
+#define  PCISTATE_RETRY_SAME_DMA	 0x00002000
+#define TG3PCI_CLOCK_CTRL		0x00000074
+#define  CLOCK_CTRL_CORECLK_DISABLE	 0x00000200
+#define  CLOCK_CTRL_RXCLK_DISABLE	 0x00000400
+#define  CLOCK_CTRL_TXCLK_DISABLE	 0x00000800
+#define  CLOCK_CTRL_ALTCLK		 0x00001000
+#define  CLOCK_CTRL_PWRDOWN_PLL133	 0x00008000
+#define  CLOCK_CTRL_44MHZ_CORE		 0x00040000
+#define  CLOCK_CTRL_DELAY_PCI_GRANT	 0x80000000
+#define TG3PCI_REG_BASE_ADDR		0x00000078
+#define TG3PCI_MEM_WIN_BASE_ADDR	0x0000007c
+#define TG3PCI_REG_DATA			0x00000080
+#define TG3PCI_MEM_WIN_DATA		0x00000084
+#define TG3PCI_MODE_CTRL		0x00000088
+#define TG3PCI_MISC_CFG			0x0000008c
+#define TG3PCI_MISC_LOCAL_CTRL		0x00000090
+/* 0x94 --> 0x98 unused */
+#define TG3PCI_STD_RING_PROD_IDX	0x00000098 /* 64-bit */
+#define TG3PCI_RCV_RET_RING_CON_IDX	0x000000a0 /* 64-bit */
+#define TG3PCI_SND_PROD_IDX		0x000000a8 /* 64-bit */
+/* 0xb0 --> 0x100 unused */
+
+/* 0x100 --> 0x200 unused */
+
+/* Mailbox registers */
+#define MAILBOX_INTERRUPT_0		0x00000200 /* 64-bit */
+#define MAILBOX_INTERRUPT_1		0x00000208 /* 64-bit */
+#define MAILBOX_INTERRUPT_2		0x00000210 /* 64-bit */
+#define MAILBOX_INTERRUPT_3		0x00000218 /* 64-bit */
+#define MAILBOX_GENERAL_0		0x00000220 /* 64-bit */
+#define MAILBOX_GENERAL_1		0x00000228 /* 64-bit */
+#define MAILBOX_GENERAL_2		0x00000230 /* 64-bit */
+#define MAILBOX_GENERAL_3		0x00000238 /* 64-bit */
+#define MAILBOX_GENERAL_4		0x00000240 /* 64-bit */
+#define MAILBOX_GENERAL_5		0x00000248 /* 64-bit */
+#define MAILBOX_GENERAL_6		0x00000250 /* 64-bit */
+#define MAILBOX_GENERAL_7		0x00000258 /* 64-bit */
+#define MAILBOX_RELOAD_STAT		0x00000260 /* 64-bit */
+#define MAILBOX_RCV_STD_PROD_IDX	0x00000268 /* 64-bit */
+#define MAILBOX_RCV_JUMBO_PROD_IDX	0x00000270 /* 64-bit */
+#define MAILBOX_RCV_MINI_PROD_IDX	0x00000278 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_0	0x00000280 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_1	0x00000288 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_2	0x00000290 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_3	0x00000298 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_4	0x000002a0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_5	0x000002a8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_6	0x000002b0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_7	0x000002b8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_8	0x000002c0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_9	0x000002c8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_10	0x000002d0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_11	0x000002d8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_12	0x000002e0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_13	0x000002e8 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_14	0x000002f0 /* 64-bit */
+#define MAILBOX_RCVRET_CON_IDX_15	0x000002f8 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_0	0x00000300 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_1	0x00000308 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_2	0x00000310 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_3	0x00000318 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_4	0x00000320 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_5	0x00000328 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_6	0x00000330 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_7	0x00000338 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_8	0x00000340 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_9	0x00000348 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_10	0x00000350 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_11	0x00000358 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_12	0x00000360 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_13	0x00000368 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_14	0x00000370 /* 64-bit */
+#define MAILBOX_SNDHOST_PROD_IDX_15	0x00000378 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_0	0x00000380 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_1	0x00000388 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_2	0x00000390 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_3	0x00000398 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_4	0x000003a0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_5	0x000003a8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_6	0x000003b0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_7	0x000003b8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_8	0x000003c0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_9	0x000003c8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_10	0x000003d0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_11	0x000003d8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_12	0x000003e0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_13	0x000003e8 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_14	0x000003f0 /* 64-bit */
+#define MAILBOX_SNDNIC_PROD_IDX_15	0x000003f8 /* 64-bit */
+
+/* MAC control registers */
+#define MAC_MODE			0x00000400
+#define  MAC_MODE_RESET			 0x00000001
+#define  MAC_MODE_HALF_DUPLEX		 0x00000002
+#define  MAC_MODE_PORT_MODE_MASK	 0x0000000c
+#define  MAC_MODE_PORT_MODE_TBI		 0x0000000c
+#define  MAC_MODE_PORT_MODE_GMII	 0x00000008
+#define  MAC_MODE_PORT_MODE_MII		 0x00000004
+#define  MAC_MODE_PORT_MODE_NONE	 0x00000000
+#define  MAC_MODE_PORT_INT_LPBACK	 0x00000010
+#define  MAC_MODE_TAGGED_MAC_CTRL	 0x00000080
+#define  MAC_MODE_TX_BURSTING		 0x00000100
+#define  MAC_MODE_MAX_DEFER		 0x00000200
+#define  MAC_MODE_LINK_POLARITY		 0x00000400
+#define  MAC_MODE_RXSTAT_ENABLE		 0x00000800
+#define  MAC_MODE_RXSTAT_CLEAR		 0x00001000
+#define  MAC_MODE_RXSTAT_FLUSH		 0x00002000
+#define  MAC_MODE_TXSTAT_ENABLE		 0x00004000
+#define  MAC_MODE_TXSTAT_CLEAR		 0x00008000
+#define  MAC_MODE_TXSTAT_FLUSH		 0x00010000
+#define  MAC_MODE_SEND_CONFIGS		 0x00020000
+#define  MAC_MODE_MAGIC_PKT_ENABLE	 0x00040000
+#define  MAC_MODE_ACPI_ENABLE		 0x00080000
+#define  MAC_MODE_MIP_ENABLE		 0x00100000
+#define  MAC_MODE_TDE_ENABLE		 0x00200000
+#define  MAC_MODE_RDE_ENABLE		 0x00400000
+#define  MAC_MODE_FHDE_ENABLE		 0x00800000
+#define MAC_STATUS			0x00000404
+#define  MAC_STATUS_PCS_SYNCED		 0x00000001
+#define  MAC_STATUS_SIGNAL_DET		 0x00000002
+#define  MAC_STATUS_RCVD_CFG		 0x00000004
+#define  MAC_STATUS_CFG_CHANGED		 0x00000008
+#define  MAC_STATUS_SYNC_CHANGED	 0x00000010
+#define  MAC_STATUS_PORT_DEC_ERR	 0x00000400
+#define  MAC_STATUS_LNKSTATE_CHANGED	 0x00001000
+#define  MAC_STATUS_MI_COMPLETION	 0x00400000
+#define  MAC_STATUS_MI_INTERRUPT	 0x00800000
+#define  MAC_STATUS_AP_ERROR		 0x01000000
+#define  MAC_STATUS_ODI_ERROR		 0x02000000
+#define  MAC_STATUS_RXSTAT_OVERRUN	 0x04000000
+#define  MAC_STATUS_TXSTAT_OVERRUN	 0x08000000
+#define MAC_EVENT			0x00000408
+#define  MAC_EVENT_PORT_DECODE_ERR	 0x00000400
+#define  MAC_EVENT_LNKSTATE_CHANGED	 0x00001000
+#define  MAC_EVENT_MI_COMPLETION	 0x00400000
+#define  MAC_EVENT_MI_INTERRUPT		 0x00800000
+#define  MAC_EVENT_AP_ERROR		 0x01000000
+#define  MAC_EVENT_ODI_ERROR		 0x02000000
+#define  MAC_EVENT_RXSTAT_OVERRUN	 0x04000000
+#define  MAC_EVENT_TXSTAT_OVERRUN	 0x08000000
+#define MAC_LED_CTRL			0x0000040c
+#define  LED_CTRL_LNKLED_OVERRIDE	 0x00000001
+#define  LED_CTRL_1000MBPS_ON		 0x00000002
+#define  LED_CTRL_100MBPS_ON		 0x00000004
+#define  LED_CTRL_10MBPS_ON		 0x00000008
+#define  LED_CTRL_TRAFFIC_OVERRIDE	 0x00000010
+#define  LED_CTRL_TRAFFIC_BLINK		 0x00000020
+#define  LED_CTRL_TRAFFIC_LED		 0x00000040
+#define  LED_CTRL_1000MBPS_STATUS	 0x00000080
+#define  LED_CTRL_100MBPS_STATUS	 0x00000100
+#define  LED_CTRL_10MBPS_STATUS		 0x00000200
+#define  LED_CTRL_TRAFFIC_STATUS	 0x00000400
+#define  LED_CTRL_MAC_MODE		 0x00000000
+#define  LED_CTRL_PHY_MODE_1		 0x00000800
+#define  LED_CTRL_PHY_MODE_2		 0x00001000
+#define  LED_CTRL_BLINK_RATE_MASK	 0x7ff80000
+#define  LED_CTRL_BLINK_RATE_SHIFT	 19
+#define  LED_CTRL_BLINK_PER_OVERRIDE	 0x00080000
+#define  LED_CTRL_BLINK_RATE_OVERRIDE	 0x80000000
+#define MAC_ADDR_0_HIGH			0x00000410 /* upper 2 bytes */
+#define MAC_ADDR_0_LOW			0x00000414 /* lower 4 bytes */
+#define MAC_ADDR_1_HIGH			0x00000418 /* upper 2 bytes */
+#define MAC_ADDR_1_LOW			0x0000041c /* lower 4 bytes */
+#define MAC_ADDR_2_HIGH			0x00000420 /* upper 2 bytes */
+#define MAC_ADDR_2_LOW			0x00000424 /* lower 4 bytes */
+#define MAC_ADDR_3_HIGH			0x00000428 /* upper 2 bytes */
+#define MAC_ADDR_3_LOW			0x0000042c /* lower 4 bytes */
+#define MAC_ACPI_MBUF_PTR		0x00000430
+#define MAC_ACPI_LEN_OFFSET		0x00000434
+#define  ACPI_LENOFF_LEN_MASK		 0x0000ffff
+#define  ACPI_LENOFF_LEN_SHIFT		 0
+#define  ACPI_LENOFF_OFF_MASK		 0x0fff0000
+#define  ACPI_LENOFF_OFF_SHIFT		 16
+#define MAC_TX_BACKOFF_SEED		0x00000438
+#define  TX_BACKOFF_SEED_MASK		 0x000003ff
+#define MAC_RX_MTU_SIZE			0x0000043c
+#define  RX_MTU_SIZE_MASK		 0x0000ffff
+#define MAC_PCS_TEST			0x00000440
+#define  PCS_TEST_PATTERN_MASK		 0x000fffff
+#define  PCS_TEST_PATTERN_SHIFT		 0
+#define  PCS_TEST_ENABLE		 0x00100000
+#define MAC_TX_AUTO_NEG			0x00000444
+#define  TX_AUTO_NEG_MASK		 0x0000ffff
+#define  TX_AUTO_NEG_SHIFT		 0
+#define MAC_RX_AUTO_NEG			0x00000448
+#define  RX_AUTO_NEG_MASK		 0x0000ffff
+#define  RX_AUTO_NEG_SHIFT		 0
+#define MAC_MI_COM			0x0000044c
+#define  MI_COM_CMD_MASK		 0x0c000000
+#define  MI_COM_CMD_WRITE		 0x04000000
+#define  MI_COM_CMD_READ		 0x08000000
+#define  MI_COM_READ_FAILED		 0x10000000
+#define  MI_COM_START			 0x20000000
+#define  MI_COM_BUSY			 0x20000000
+#define  MI_COM_PHY_ADDR_MASK		 0x03e00000
+#define  MI_COM_PHY_ADDR_SHIFT		 21
+#define  MI_COM_REG_ADDR_MASK		 0x001f0000
+#define  MI_COM_REG_ADDR_SHIFT		 16
+#define  MI_COM_DATA_MASK		 0x0000ffff
+#define MAC_MI_STAT			0x00000450
+#define  MAC_MI_STAT_LNKSTAT_ATTN_ENAB	 0x00000001
+#define MAC_MI_MODE			0x00000454
+#define  MAC_MI_MODE_CLK_10MHZ		 0x00000001
+#define  MAC_MI_MODE_SHORT_PREAMBLE	 0x00000002
+#define  MAC_MI_MODE_AUTO_POLL		 0x00000010
+#define  MAC_MI_MODE_CORE_CLK_62MHZ	 0x00008000
+#define  MAC_MI_MODE_BASE		 0x000c0000 /* XXX magic values XXX */
+#define MAC_AUTO_POLL_STATUS		0x00000458
+#define  MAC_AUTO_POLL_ERROR		 0x00000001
+#define MAC_TX_MODE			0x0000045c
+#define  TX_MODE_RESET			 0x00000001
+#define  TX_MODE_ENABLE			 0x00000002
+#define  TX_MODE_FLOW_CTRL_ENABLE	 0x00000010
+#define  TX_MODE_BIG_BCKOFF_ENABLE	 0x00000020
+#define  TX_MODE_LONG_PAUSE_ENABLE	 0x00000040
+#define MAC_TX_STATUS			0x00000460
+#define  TX_STATUS_XOFFED		 0x00000001
+#define  TX_STATUS_SENT_XOFF		 0x00000002
+#define  TX_STATUS_SENT_XON		 0x00000004
+#define  TX_STATUS_LINK_UP		 0x00000008
+#define  TX_STATUS_ODI_UNDERRUN		 0x00000010
+#define  TX_STATUS_ODI_OVERRUN		 0x00000020
+#define MAC_TX_LENGTHS			0x00000464
+#define  TX_LENGTHS_SLOT_TIME_MASK	 0x000000ff
+#define  TX_LENGTHS_SLOT_TIME_SHIFT	 0
+#define  TX_LENGTHS_IPG_MASK		 0x00000f00
+#define  TX_LENGTHS_IPG_SHIFT		 8
+#define  TX_LENGTHS_IPG_CRS_MASK	 0x00003000
+#define  TX_LENGTHS_IPG_CRS_SHIFT	 12
+#define MAC_RX_MODE			0x00000468
+#define  RX_MODE_RESET			 0x00000001
+#define  RX_MODE_ENABLE			 0x00000002
+#define  RX_MODE_FLOW_CTRL_ENABLE	 0x00000004
+#define  RX_MODE_KEEP_MAC_CTRL		 0x00000008
+#define  RX_MODE_KEEP_PAUSE		 0x00000010
+#define  RX_MODE_ACCEPT_OVERSIZED	 0x00000020
+#define  RX_MODE_ACCEPT_RUNTS		 0x00000040
+#define  RX_MODE_LEN_CHECK		 0x00000080
+#define  RX_MODE_PROMISC		 0x00000100
+#define  RX_MODE_NO_CRC_CHECK		 0x00000200
+#define  RX_MODE_KEEP_VLAN_TAG		 0x00000400
+#define MAC_RX_STATUS			0x0000046c
+#define  RX_STATUS_REMOTE_TX_XOFFED	 0x00000001
+#define  RX_STATUS_XOFF_RCVD		 0x00000002
+#define  RX_STATUS_XON_RCVD		 0x00000004
+#define MAC_HASH_REG_0			0x00000470
+#define MAC_HASH_REG_1			0x00000474
+#define MAC_HASH_REG_2			0x00000478
+#define MAC_HASH_REG_3			0x0000047c
+#define MAC_RCV_RULE_0			0x00000480
+#define MAC_RCV_VALUE_0			0x00000484
+#define MAC_RCV_RULE_1			0x00000488
+#define MAC_RCV_VALUE_1			0x0000048c
+#define MAC_RCV_RULE_2			0x00000490
+#define MAC_RCV_VALUE_2			0x00000494
+#define MAC_RCV_RULE_3			0x00000498
+#define MAC_RCV_VALUE_3			0x0000049c
+#define MAC_RCV_RULE_4			0x000004a0
+#define MAC_RCV_VALUE_4			0x000004a4
+#define MAC_RCV_RULE_5			0x000004a8
+#define MAC_RCV_VALUE_5			0x000004ac
+#define MAC_RCV_RULE_6			0x000004b0
+#define MAC_RCV_VALUE_6			0x000004b4
+#define MAC_RCV_RULE_7			0x000004b8
+#define MAC_RCV_VALUE_7			0x000004bc
+#define MAC_RCV_RULE_8			0x000004c0
+#define MAC_RCV_VALUE_8			0x000004c4
+#define MAC_RCV_RULE_9			0x000004c8
+#define MAC_RCV_VALUE_9			0x000004cc
+#define MAC_RCV_RULE_10			0x000004d0
+#define MAC_RCV_VALUE_10		0x000004d4
+#define MAC_RCV_RULE_11			0x000004d8
+#define MAC_RCV_VALUE_11		0x000004dc
+#define MAC_RCV_RULE_12			0x000004e0
+#define MAC_RCV_VALUE_12		0x000004e4
+#define MAC_RCV_RULE_13			0x000004e8
+#define MAC_RCV_VALUE_13		0x000004ec
+#define MAC_RCV_RULE_14			0x000004f0
+#define MAC_RCV_VALUE_14		0x000004f4
+#define MAC_RCV_RULE_15			0x000004f8
+#define MAC_RCV_VALUE_15		0x000004fc
+#define  RCV_RULE_DISABLE_MASK		 0x7fffffff
+#define MAC_RCV_RULE_CFG		0x00000500
+#define  RCV_RULE_CFG_DEFAULT_CLASS	0x00000008
+/* 0x504 --> 0x590 unused */
+#define MAC_SERDES_CFG			0x00000590
+#define MAC_SERDES_STAT			0x00000594
+/* 0x598 --> 0x600 unused */
+#define MAC_TX_MAC_STATE_BASE		0x00000600 /* 16 bytes */
+#define MAC_RX_MAC_STATE_BASE		0x00000610 /* 20 bytes */
+/* 0x624 --> 0x800 unused */
+#define MAC_RX_STATS_BASE		0x00000800 /* 26 32-bit words */
+/* 0x868 --> 0x880 unused */
+#define MAC_TX_STATS_BASE		0x00000880 /* 28 32-bit words */
+/* 0x8f0 --> 0xc00 unused */
+
+/* Send data initiator control registers */
+#define SNDDATAI_MODE			0x00000c00
+#define  SNDDATAI_MODE_RESET		 0x00000001
+#define  SNDDATAI_MODE_ENABLE		 0x00000002
+#define  SNDDATAI_MODE_STAT_OFLOW_ENAB	 0x00000004
+#define SNDDATAI_STATUS			0x00000c04
+#define  SNDDATAI_STATUS_STAT_OFLOW	 0x00000004
+#define SNDDATAI_STATSCTRL		0x00000c08
+#define  SNDDATAI_SCTRL_ENABLE		 0x00000001
+#define  SNDDATAI_SCTRL_FASTUPD		 0x00000002
+#define  SNDDATAI_SCTRL_CLEAR		 0x00000004
+#define  SNDDATAI_SCTRL_FLUSH		 0x00000008
+#define  SNDDATAI_SCTRL_FORCE_ZERO	 0x00000010
+#define SNDDATAI_STATSENAB		0x00000c0c
+#define SNDDATAI_STATSINCMASK		0x00000c10
+/* 0xc14 --> 0xc80 unused */
+#define SNDDATAI_COS_CNT_0		0x00000c80
+#define SNDDATAI_COS_CNT_1		0x00000c84
+#define SNDDATAI_COS_CNT_2		0x00000c88
+#define SNDDATAI_COS_CNT_3		0x00000c8c
+#define SNDDATAI_COS_CNT_4		0x00000c90
+#define SNDDATAI_COS_CNT_5		0x00000c94
+#define SNDDATAI_COS_CNT_6		0x00000c98
+#define SNDDATAI_COS_CNT_7		0x00000c9c
+#define SNDDATAI_COS_CNT_8		0x00000ca0
+#define SNDDATAI_COS_CNT_9		0x00000ca4
+#define SNDDATAI_COS_CNT_10		0x00000ca8
+#define SNDDATAI_COS_CNT_11		0x00000cac
+#define SNDDATAI_COS_CNT_12		0x00000cb0
+#define SNDDATAI_COS_CNT_13		0x00000cb4
+#define SNDDATAI_COS_CNT_14		0x00000cb8
+#define SNDDATAI_COS_CNT_15		0x00000cbc
+#define SNDDATAI_DMA_RDQ_FULL_CNT	0x00000cc0
+#define SNDDATAI_DMA_PRIO_RDQ_FULL_CNT	0x00000cc4
+#define SNDDATAI_SDCQ_FULL_CNT		0x00000cc8
+#define SNDDATAI_NICRNG_SSND_PIDX_CNT	0x00000ccc
+#define SNDDATAI_STATS_UPDATED_CNT	0x00000cd0
+#define SNDDATAI_INTERRUPTS_CNT		0x00000cd4
+#define SNDDATAI_AVOID_INTERRUPTS_CNT	0x00000cd8
+#define SNDDATAI_SND_THRESH_HIT_CNT	0x00000cdc
+/* 0xce0 --> 0x1000 unused */
+
+/* Send data completion control registers */
+#define SNDDATAC_MODE			0x00001000
+#define  SNDDATAC_MODE_RESET		 0x00000001
+#define  SNDDATAC_MODE_ENABLE		 0x00000002
+/* 0x1004 --> 0x1400 unused */
+
+/* Send BD ring selector */
+#define SNDBDS_MODE			0x00001400
+#define  SNDBDS_MODE_RESET		 0x00000001
+#define  SNDBDS_MODE_ENABLE		 0x00000002
+#define  SNDBDS_MODE_ATTN_ENABLE	 0x00000004
+#define SNDBDS_STATUS			0x00001404
+#define  SNDBDS_STATUS_ERROR_ATTN	 0x00000004
+#define SNDBDS_HWDIAG			0x00001408
+/* 0x140c --> 0x1440 */
+#define SNDBDS_SEL_CON_IDX_0		0x00001440
+#define SNDBDS_SEL_CON_IDX_1		0x00001444
+#define SNDBDS_SEL_CON_IDX_2		0x00001448
+#define SNDBDS_SEL_CON_IDX_3		0x0000144c
+#define SNDBDS_SEL_CON_IDX_4		0x00001450
+#define SNDBDS_SEL_CON_IDX_5		0x00001454
+#define SNDBDS_SEL_CON_IDX_6		0x00001458
+#define SNDBDS_SEL_CON_IDX_7		0x0000145c
+#define SNDBDS_SEL_CON_IDX_8		0x00001460
+#define SNDBDS_SEL_CON_IDX_9		0x00001464
+#define SNDBDS_SEL_CON_IDX_10		0x00001468
+#define SNDBDS_SEL_CON_IDX_11		0x0000146c
+#define SNDBDS_SEL_CON_IDX_12		0x00001470
+#define SNDBDS_SEL_CON_IDX_13		0x00001474
+#define SNDBDS_SEL_CON_IDX_14		0x00001478
+#define SNDBDS_SEL_CON_IDX_15		0x0000147c
+/* 0x1480 --> 0x1800 unused */
+
+/* Send BD initiator control registers */
+#define SNDBDI_MODE			0x00001800
+#define  SNDBDI_MODE_RESET		 0x00000001
+#define  SNDBDI_MODE_ENABLE		 0x00000002
+#define  SNDBDI_MODE_ATTN_ENABLE	 0x00000004
+#define SNDBDI_STATUS			0x00001804
+#define  SNDBDI_STATUS_ERROR_ATTN	 0x00000004
+#define SNDBDI_IN_PROD_IDX_0		0x00001808
+#define SNDBDI_IN_PROD_IDX_1		0x0000180c
+#define SNDBDI_IN_PROD_IDX_2		0x00001810
+#define SNDBDI_IN_PROD_IDX_3		0x00001814
+#define SNDBDI_IN_PROD_IDX_4		0x00001818
+#define SNDBDI_IN_PROD_IDX_5		0x0000181c
+#define SNDBDI_IN_PROD_IDX_6		0x00001820
+#define SNDBDI_IN_PROD_IDX_7		0x00001824
+#define SNDBDI_IN_PROD_IDX_8		0x00001828
+#define SNDBDI_IN_PROD_IDX_9		0x0000182c
+#define SNDBDI_IN_PROD_IDX_10		0x00001830
+#define SNDBDI_IN_PROD_IDX_11		0x00001834
+#define SNDBDI_IN_PROD_IDX_12		0x00001838
+#define SNDBDI_IN_PROD_IDX_13		0x0000183c
+#define SNDBDI_IN_PROD_IDX_14		0x00001840
+#define SNDBDI_IN_PROD_IDX_15		0x00001844
+/* 0x1848 --> 0x1c00 unused */
+
+/* Send BD completion control registers */
+#define SNDBDC_MODE			0x00001c00
+#define SNDBDC_MODE_RESET		 0x00000001
+#define SNDBDC_MODE_ENABLE		 0x00000002
+#define SNDBDC_MODE_ATTN_ENABLE		 0x00000004
+/* 0x1c04 --> 0x2000 unused */
+
+/* Receive list placement control registers */
+#define RCVLPC_MODE			0x00002000
+#define  RCVLPC_MODE_RESET		 0x00000001
+#define  RCVLPC_MODE_ENABLE		 0x00000002
+#define  RCVLPC_MODE_CLASS0_ATTN_ENAB	 0x00000004
+#define  RCVLPC_MODE_MAPOOR_AATTN_ENAB	 0x00000008
+#define  RCVLPC_MODE_STAT_OFLOW_ENAB	 0x00000010
+#define RCVLPC_STATUS			0x00002004
+#define  RCVLPC_STATUS_CLASS0		 0x00000004
+#define  RCVLPC_STATUS_MAPOOR		 0x00000008
+#define  RCVLPC_STATUS_STAT_OFLOW	 0x00000010
+#define RCVLPC_LOCK			0x00002008
+#define  RCVLPC_LOCK_REQ_MASK		 0x0000ffff
+#define  RCVLPC_LOCK_REQ_SHIFT		 0
+#define  RCVLPC_LOCK_GRANT_MASK		 0xffff0000
+#define  RCVLPC_LOCK_GRANT_SHIFT	 16
+#define RCVLPC_NON_EMPTY_BITS		0x0000200c
+#define  RCVLPC_NON_EMPTY_BITS_MASK	 0x0000ffff
+#define RCVLPC_CONFIG			0x00002010
+#define RCVLPC_STATSCTRL		0x00002014
+#define  RCVLPC_STATSCTRL_ENABLE	 0x00000001
+#define  RCVLPC_STATSCTRL_FASTUPD	 0x00000002
+#define RCVLPC_STATS_ENABLE		0x00002018
+#define RCVLPC_STATS_INCMASK		0x0000201c
+/* 0x2020 --> 0x2100 unused */
+#define RCVLPC_SELLST_BASE		0x00002100 /* 16 16-byte entries */
+#define  SELLST_TAIL			0x00000004
+#define  SELLST_CONT			0x00000008
+#define  SELLST_UNUSED			0x0000000c
+#define RCVLPC_COS_CNTL_BASE		0x00002200 /* 16 4-byte entries */
+#define RCVLPC_DROP_FILTER_CNT		0x00002240
+#define RCVLPC_DMA_WQ_FULL_CNT		0x00002244
+#define RCVLPC_DMA_HIPRIO_WQ_FULL_CNT	0x00002248
+#define RCVLPC_NO_RCV_BD_CNT		0x0000224c
+#define RCVLPC_IN_DISCARDS_CNT		0x00002250
+#define RCVLPC_IN_ERRORS_CNT		0x00002254
+#define RCVLPC_RCV_THRESH_HIT_CNT	0x00002258
+/* 0x225c --> 0x2400 unused */
+
+/* Receive Data and Receive BD Initiator Control */
+#define RCVDBDI_MODE			0x00002400
+#define  RCVDBDI_MODE_RESET		 0x00000001
+#define  RCVDBDI_MODE_ENABLE		 0x00000002
+#define  RCVDBDI_MODE_JUMBOBD_NEEDED	 0x00000004
+#define  RCVDBDI_MODE_FRM_TOO_BIG	 0x00000008
+#define  RCVDBDI_MODE_INV_RING_SZ	 0x00000010
+#define RCVDBDI_STATUS			0x00002404
+#define  RCVDBDI_STATUS_JUMBOBD_NEEDED	 0x00000004
+#define  RCVDBDI_STATUS_FRM_TOO_BIG	 0x00000008
+#define  RCVDBDI_STATUS_INV_RING_SZ	 0x00000010
+#define RCVDBDI_SPLIT_FRAME_MINSZ	0x00002408
+/* 0x240c --> 0x2440 unused */
+#define RCVDBDI_JUMBO_BD		0x00002440 /* TG3_BDINFO_... */
+#define RCVDBDI_STD_BD			0x00002450 /* TG3_BDINFO_... */
+#define RCVDBDI_MINI_BD			0x00002460 /* TG3_BDINFO_... */
+#define RCVDBDI_JUMBO_CON_IDX		0x00002470
+#define RCVDBDI_STD_CON_IDX		0x00002474
+#define RCVDBDI_MINI_CON_IDX		0x00002478
+/* 0x247c --> 0x2480 unused */
+#define RCVDBDI_BD_PROD_IDX_0		0x00002480
+#define RCVDBDI_BD_PROD_IDX_1		0x00002484
+#define RCVDBDI_BD_PROD_IDX_2		0x00002488
+#define RCVDBDI_BD_PROD_IDX_3		0x0000248c
+#define RCVDBDI_BD_PROD_IDX_4		0x00002490
+#define RCVDBDI_BD_PROD_IDX_5		0x00002494
+#define RCVDBDI_BD_PROD_IDX_6		0x00002498
+#define RCVDBDI_BD_PROD_IDX_7		0x0000249c
+#define RCVDBDI_BD_PROD_IDX_8		0x000024a0
+#define RCVDBDI_BD_PROD_IDX_9		0x000024a4
+#define RCVDBDI_BD_PROD_IDX_10		0x000024a8
+#define RCVDBDI_BD_PROD_IDX_11		0x000024ac
+#define RCVDBDI_BD_PROD_IDX_12		0x000024b0
+#define RCVDBDI_BD_PROD_IDX_13		0x000024b4
+#define RCVDBDI_BD_PROD_IDX_14		0x000024b8
+#define RCVDBDI_BD_PROD_IDX_15		0x000024bc
+#define RCVDBDI_HWDIAG			0x000024c0
+/* 0x24c4 --> 0x2800 unused */
+
+/* Receive Data Completion Control */
+#define RCVDCC_MODE			0x00002800
+#define  RCVDCC_MODE_RESET		 0x00000001
+#define  RCVDCC_MODE_ENABLE		 0x00000002
+#define  RCVDCC_MODE_ATTN_ENABLE	 0x00000004
+/* 0x2804 --> 0x2c00 unused */
+
+/* Receive BD Initiator Control Registers */
+#define RCVBDI_MODE			0x00002c00
+#define  RCVBDI_MODE_RESET		 0x00000001
+#define  RCVBDI_MODE_ENABLE		 0x00000002
+#define  RCVBDI_MODE_RCB_ATTN_ENAB	 0x00000004
+#define RCVBDI_STATUS			0x00002c04
+#define  RCVBDI_STATUS_RCB_ATTN		 0x00000004
+#define RCVBDI_JUMBO_PROD_IDX		0x00002c08
+#define RCVBDI_STD_PROD_IDX		0x00002c0c
+#define RCVBDI_MINI_PROD_IDX		0x00002c10
+#define RCVBDI_MINI_THRESH		0x00002c14
+#define RCVBDI_STD_THRESH		0x00002c18
+#define RCVBDI_JUMBO_THRESH		0x00002c1c
+/* 0x2c20 --> 0x3000 unused */
+
+/* Receive BD Completion Control Registers */
+#define RCVCC_MODE			0x00003000
+#define  RCVCC_MODE_RESET		 0x00000001
+#define  RCVCC_MODE_ENABLE		 0x00000002
+#define  RCVCC_MODE_ATTN_ENABLE		 0x00000004
+#define RCVCC_STATUS			0x00003004
+#define  RCVCC_STATUS_ERROR_ATTN	 0x00000004
+#define RCVCC_JUMP_PROD_IDX		0x00003008
+#define RCVCC_STD_PROD_IDX		0x0000300c
+#define RCVCC_MINI_PROD_IDX		0x00003010
+/* 0x3014 --> 0x3400 unused */
+
+/* Receive list selector control registers */
+#define RCVLSC_MODE			0x00003400
+#define  RCVLSC_MODE_RESET		 0x00000001
+#define  RCVLSC_MODE_ENABLE		 0x00000002
+#define  RCVLSC_MODE_ATTN_ENABLE	 0x00000004
+#define RCVLSC_STATUS			0x00003404
+#define  RCVLSC_STATUS_ERROR_ATTN	 0x00000004
+/* 0x3408 --> 0x3800 unused */
+
+/* Mbuf cluster free registers */
+#define MBFREE_MODE			0x00003800
+#define  MBFREE_MODE_RESET		 0x00000001
+#define  MBFREE_MODE_ENABLE		 0x00000002
+#define MBFREE_STATUS			0x00003804
+/* 0x3808 --> 0x3c00 unused */
+
+/* Host coalescing control registers */
+#define HOSTCC_MODE			0x00003c00
+#define  HOSTCC_MODE_RESET		 0x00000001
+#define  HOSTCC_MODE_ENABLE		 0x00000002
+#define  HOSTCC_MODE_ATTN		 0x00000004
+#define  HOSTCC_MODE_NOW		 0x00000008
+#define  HOSTCC_MODE_FULL_STATUS	 0x00000000
+#define  HOSTCC_MODE_64BYTE		 0x00000080
+#define  HOSTCC_MODE_32BYTE		 0x00000100
+#define  HOSTCC_MODE_CLRTICK_RXBD	 0x00000200
+#define  HOSTCC_MODE_CLRTICK_TXBD	 0x00000400
+#define  HOSTCC_MODE_NOINT_ON_NOW	 0x00000800
+#define  HOSTCC_MODE_NOINT_ON_FORCE	 0x00001000
+#define HOSTCC_STATUS			0x00003c04
+#define  HOSTCC_STATUS_ERROR_ATTN	 0x00000004
+#define HOSTCC_RXCOL_TICKS		0x00003c08
+#define  LOW_RXCOL_TICKS		 0x00000032
+#define  DEFAULT_RXCOL_TICKS		 0x00000048
+#define  HIGH_RXCOL_TICKS		 0x00000096
+#define HOSTCC_TXCOL_TICKS		0x00003c0c
+#define  LOW_TXCOL_TICKS		 0x00000096
+#define  DEFAULT_TXCOL_TICKS		 0x0000012c
+#define  HIGH_TXCOL_TICKS		 0x00000145
+#define HOSTCC_RXMAX_FRAMES		0x00003c10
+#define  LOW_RXMAX_FRAMES		 0x00000005
+#define  DEFAULT_RXMAX_FRAMES		 0x00000008
+#define  HIGH_RXMAX_FRAMES		 0x00000012
+#define HOSTCC_TXMAX_FRAMES		0x00003c14
+#define  LOW_TXMAX_FRAMES		 0x00000035
+#define  DEFAULT_TXMAX_FRAMES		 0x0000004b
+#define  HIGH_TXMAX_FRAMES		 0x00000052
+#define HOSTCC_RXCOAL_TICK_INT		0x00003c18
+#define  DEFAULT_RXCOAL_TICK_INT	 0x00000019
+#define HOSTCC_TXCOAL_TICK_INT		0x00003c1c
+#define  DEFAULT_TXCOAL_TICK_INT	 0x00000019
+#define HOSTCC_RXCOAL_MAXF_INT		0x00003c20
+#define  DEFAULT_RXCOAL_MAXF_INT	 0x00000005
+#define HOSTCC_TXCOAL_MAXF_INT		0x00003c24
+#define  DEFAULT_TXCOAL_MAXF_INT	 0x00000005
+#define HOSTCC_STAT_COAL_TICKS		0x00003c28
+#define  DEFAULT_STAT_COAL_TICKS	 0x000f4240
+/* 0x3c2c --> 0x3c30 unused */
+#define HOSTCC_STATS_BLK_HOST_ADDR	0x00003c30 /* 64-bit */
+#define HOSTCC_STATUS_BLK_HOST_ADDR	0x00003c38 /* 64-bit */
+#define HOSTCC_STATS_BLK_NIC_ADDR	0x00003c40
+#define HOSTCC_STATUS_BLK_NIC_ADDR	0x00003c44
+#define HOSTCC_FLOW_ATTN		0x00003c48
+/* 0x3c4c --> 0x3c50 unused */
+#define HOSTCC_JUMBO_CON_IDX		0x00003c50
+#define HOSTCC_STD_CON_IDX		0x00003c54
+#define HOSTCC_MINI_CON_IDX		0x00003c58
+/* 0x3c5c --> 0x3c80 unused */
+#define HOSTCC_RET_PROD_IDX_0		0x00003c80
+#define HOSTCC_RET_PROD_IDX_1		0x00003c84
+#define HOSTCC_RET_PROD_IDX_2		0x00003c88
+#define HOSTCC_RET_PROD_IDX_3		0x00003c8c
+#define HOSTCC_RET_PROD_IDX_4		0x00003c90
+#define HOSTCC_RET_PROD_IDX_5		0x00003c94
+#define HOSTCC_RET_PROD_IDX_6		0x00003c98
+#define HOSTCC_RET_PROD_IDX_7		0x00003c9c
+#define HOSTCC_RET_PROD_IDX_8		0x00003ca0
+#define HOSTCC_RET_PROD_IDX_9		0x00003ca4
+#define HOSTCC_RET_PROD_IDX_10		0x00003ca8
+#define HOSTCC_RET_PROD_IDX_11		0x00003cac
+#define HOSTCC_RET_PROD_IDX_12		0x00003cb0
+#define HOSTCC_RET_PROD_IDX_13		0x00003cb4
+#define HOSTCC_RET_PROD_IDX_14		0x00003cb8
+#define HOSTCC_RET_PROD_IDX_15		0x00003cbc
+#define HOSTCC_SND_CON_IDX_0		0x00003cc0
+#define HOSTCC_SND_CON_IDX_1		0x00003cc4
+#define HOSTCC_SND_CON_IDX_2		0x00003cc8
+#define HOSTCC_SND_CON_IDX_3		0x00003ccc
+#define HOSTCC_SND_CON_IDX_4		0x00003cd0
+#define HOSTCC_SND_CON_IDX_5		0x00003cd4
+#define HOSTCC_SND_CON_IDX_6		0x00003cd8
+#define HOSTCC_SND_CON_IDX_7		0x00003cdc
+#define HOSTCC_SND_CON_IDX_8		0x00003ce0
+#define HOSTCC_SND_CON_IDX_9		0x00003ce4
+#define HOSTCC_SND_CON_IDX_10		0x00003ce8
+#define HOSTCC_SND_CON_IDX_11		0x00003cec
+#define HOSTCC_SND_CON_IDX_12		0x00003cf0
+#define HOSTCC_SND_CON_IDX_13		0x00003cf4
+#define HOSTCC_SND_CON_IDX_14		0x00003cf8
+#define HOSTCC_SND_CON_IDX_15		0x00003cfc
+/* 0x3d00 --> 0x4000 unused */
+
+/* Memory arbiter control registers */
+#define MEMARB_MODE			0x00004000
+#define  MEMARB_MODE_RESET		 0x00000001
+#define  MEMARB_MODE_ENABLE		 0x00000002
+#define MEMARB_STATUS			0x00004004
+#define MEMARB_TRAP_ADDR_LOW		0x00004008
+#define MEMARB_TRAP_ADDR_HIGH		0x0000400c
+/* 0x4010 --> 0x4400 unused */
+
+/* Buffer manager control registers */
+#define BUFMGR_MODE			0x00004400
+#define  BUFMGR_MODE_RESET		 0x00000001
+#define  BUFMGR_MODE_ENABLE		 0x00000002
+#define  BUFMGR_MODE_ATTN_ENABLE	 0x00000004
+#define  BUFMGR_MODE_BM_TEST		 0x00000008
+#define  BUFMGR_MODE_MBLOW_ATTN_ENAB	 0x00000010
+#define BUFMGR_STATUS			0x00004404
+#define  BUFMGR_STATUS_ERROR		 0x00000004
+#define  BUFMGR_STATUS_MBLOW		 0x00000010
+#define BUFMGR_MB_POOL_ADDR		0x00004408
+#define BUFMGR_MB_POOL_SIZE		0x0000440c
+#define BUFMGR_MB_RDMA_LOW_WATER	0x00004410
+#define  DEFAULT_MB_RDMA_LOW_WATER	 0x00000040
+#define  DEFAULT_MB_RDMA_LOW_WATER_JUMBO 0x00000130
+#define BUFMGR_MB_MACRX_LOW_WATER	0x00004414
+#define  DEFAULT_MB_MACRX_LOW_WATER	  0x00000020
+#define  DEFAULT_MB_MACRX_LOW_WATER_JUMBO 0x00000098
+#define BUFMGR_MB_HIGH_WATER		0x00004418
+#define  DEFAULT_MB_HIGH_WATER		 0x00000060
+#define  DEFAULT_MB_HIGH_WATER_JUMBO	 0x0000017c
+#define BUFMGR_RX_MB_ALLOC_REQ		0x0000441c
+#define  BUFMGR_MB_ALLOC_BIT		 0x10000000
+#define BUFMGR_RX_MB_ALLOC_RESP		0x00004420
+#define BUFMGR_TX_MB_ALLOC_REQ		0x00004424
+#define BUFMGR_TX_MB_ALLOC_RESP		0x00004428
+#define BUFMGR_DMA_DESC_POOL_ADDR	0x0000442c
+#define BUFMGR_DMA_DESC_POOL_SIZE	0x00004430
+#define BUFMGR_DMA_LOW_WATER		0x00004434
+#define  DEFAULT_DMA_LOW_WATER		 0x00000005
+#define BUFMGR_DMA_HIGH_WATER		0x00004438
+#define  DEFAULT_DMA_HIGH_WATER		 0x0000000a
+#define BUFMGR_RX_DMA_ALLOC_REQ		0x0000443c
+#define BUFMGR_RX_DMA_ALLOC_RESP	0x00004440
+#define BUFMGR_TX_DMA_ALLOC_REQ		0x00004444
+#define BUFMGR_TX_DMA_ALLOC_RESP	0x00004448
+#define BUFMGR_HWDIAG_0			0x0000444c
+#define BUFMGR_HWDIAG_1			0x00004450
+#define BUFMGR_HWDIAG_2			0x00004454
+/* 0x4458 --> 0x4800 unused */
+
+/* Read DMA control registers */
+#define RDMAC_MODE			0x00004800
+#define  RDMAC_MODE_RESET		 0x00000001
+#define  RDMAC_MODE_ENABLE		 0x00000002
+#define  RDMAC_MODE_TGTABORT_ENAB	 0x00000004
+#define  RDMAC_MODE_MSTABORT_ENAB	 0x00000008
+#define  RDMAC_MODE_PARITYERR_ENAB	 0x00000010
+#define  RDMAC_MODE_ADDROFLOW_ENAB	 0x00000020
+#define  RDMAC_MODE_FIFOOFLOW_ENAB	 0x00000040
+#define  RDMAC_MODE_FIFOURUN_ENAB	 0x00000080
+#define  RDMAC_MODE_FIFOOREAD_ENAB	 0x00000100
+#define  RDMAC_MODE_LNGREAD_ENAB	 0x00000200
+#define  RDMAC_MODE_SPLIT_ENABLE	 0x00000800
+#define  RDMAC_MODE_SPLIT_RESET		 0x00001000
+#define RDMAC_STATUS			0x00004804
+#define  RDMAC_STATUS_TGTABORT		 0x00000004
+#define  RDMAC_STATUS_MSTABORT		 0x00000008
+#define  RDMAC_STATUS_PARITYERR		 0x00000010
+#define  RDMAC_STATUS_ADDROFLOW		 0x00000020
+#define  RDMAC_STATUS_FIFOOFLOW		 0x00000040
+#define  RDMAC_STATUS_FIFOURUN		 0x00000080
+#define  RDMAC_STATUS_FIFOOREAD		 0x00000100
+#define  RDMAC_STATUS_LNGREAD		 0x00000200
+/* 0x4808 --> 0x4c00 unused */
+
+/* Write DMA control registers */
+#define WDMAC_MODE			0x00004c00
+#define  WDMAC_MODE_RESET		 0x00000001
+#define  WDMAC_MODE_ENABLE		 0x00000002
+#define  WDMAC_MODE_TGTABORT_ENAB	 0x00000004
+#define  WDMAC_MODE_MSTABORT_ENAB	 0x00000008
+#define  WDMAC_MODE_PARITYERR_ENAB	 0x00000010
+#define  WDMAC_MODE_ADDROFLOW_ENAB	 0x00000020
+#define  WDMAC_MODE_FIFOOFLOW_ENAB	 0x00000040
+#define  WDMAC_MODE_FIFOURUN_ENAB	 0x00000080
+#define  WDMAC_MODE_FIFOOREAD_ENAB	 0x00000100
+#define  WDMAC_MODE_LNGREAD_ENAB	 0x00000200
+#define WDMAC_STATUS			0x00004c04
+#define  WDMAC_STATUS_TGTABORT		 0x00000004
+#define  WDMAC_STATUS_MSTABORT		 0x00000008
+#define  WDMAC_STATUS_PARITYERR		 0x00000010
+#define  WDMAC_STATUS_ADDROFLOW		 0x00000020
+#define  WDMAC_STATUS_FIFOOFLOW		 0x00000040
+#define  WDMAC_STATUS_FIFOURUN		 0x00000080
+#define  WDMAC_STATUS_FIFOOREAD		 0x00000100
+#define  WDMAC_STATUS_LNGREAD		 0x00000200
+/* 0x4c08 --> 0x5000 unused */
+
+/* Per-cpu register offsets (arm9) */
+#define CPU_MODE			0x00000000
+#define  CPU_MODE_RESET			 0x00000001
+#define  CPU_MODE_HALT			 0x00000400
+#define CPU_STATE			0x00000004
+#define CPU_EVTMASK			0x00000008
+/* 0xc --> 0x1c reserved */
+#define CPU_PC				0x0000001c
+#define CPU_INSN			0x00000020
+#define CPU_SPAD_UFLOW			0x00000024
+#define CPU_WDOG_CLEAR			0x00000028
+#define CPU_WDOG_VECTOR			0x0000002c
+#define CPU_WDOG_PC			0x00000030
+#define CPU_HW_BP			0x00000034
+/* 0x38 --> 0x44 unused */
+#define CPU_WDOG_SAVED_STATE		0x00000044
+#define CPU_LAST_BRANCH_ADDR		0x00000048
+#define CPU_SPAD_UFLOW_SET		0x0000004c
+/* 0x50 --> 0x200 unused */
+#define CPU_R0				0x00000200
+#define CPU_R1				0x00000204
+#define CPU_R2				0x00000208
+#define CPU_R3				0x0000020c
+#define CPU_R4				0x00000210
+#define CPU_R5				0x00000214
+#define CPU_R6				0x00000218
+#define CPU_R7				0x0000021c
+#define CPU_R8				0x00000220
+#define CPU_R9				0x00000224
+#define CPU_R10				0x00000228
+#define CPU_R11				0x0000022c
+#define CPU_R12				0x00000230
+#define CPU_R13				0x00000234
+#define CPU_R14				0x00000238
+#define CPU_R15				0x0000023c
+#define CPU_R16				0x00000240
+#define CPU_R17				0x00000244
+#define CPU_R18				0x00000248
+#define CPU_R19				0x0000024c
+#define CPU_R20				0x00000250
+#define CPU_R21				0x00000254
+#define CPU_R22				0x00000258
+#define CPU_R23				0x0000025c
+#define CPU_R24				0x00000260
+#define CPU_R25				0x00000264
+#define CPU_R26				0x00000268
+#define CPU_R27				0x0000026c
+#define CPU_R28				0x00000270
+#define CPU_R29				0x00000274
+#define CPU_R30				0x00000278
+#define CPU_R31				0x0000027c
+/* 0x280 --> 0x400 unused */
+
+#define RX_CPU_BASE			0x00005000
+#define TX_CPU_BASE			0x00005400
+
+/* Mailboxes */
+#define GRCMBOX_INTERRUPT_0		0x00005800 /* 64-bit */
+#define GRCMBOX_INTERRUPT_1		0x00005808 /* 64-bit */
+#define GRCMBOX_INTERRUPT_2		0x00005810 /* 64-bit */
+#define GRCMBOX_INTERRUPT_3		0x00005818 /* 64-bit */
+#define GRCMBOX_GENERAL_0		0x00005820 /* 64-bit */
+#define GRCMBOX_GENERAL_1		0x00005828 /* 64-bit */
+#define GRCMBOX_GENERAL_2		0x00005830 /* 64-bit */
+#define GRCMBOX_GENERAL_3		0x00005838 /* 64-bit */
+#define GRCMBOX_GENERAL_4		0x00005840 /* 64-bit */
+#define GRCMBOX_GENERAL_5		0x00005848 /* 64-bit */
+#define GRCMBOX_GENERAL_6		0x00005850 /* 64-bit */
+#define GRCMBOX_GENERAL_7		0x00005858 /* 64-bit */
+#define GRCMBOX_RELOAD_STAT		0x00005860 /* 64-bit */
+#define GRCMBOX_RCVSTD_PROD_IDX		0x00005868 /* 64-bit */
+#define GRCMBOX_RCVJUMBO_PROD_IDX	0x00005870 /* 64-bit */
+#define GRCMBOX_RCVMINI_PROD_IDX	0x00005878 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_0	0x00005880 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_1	0x00005888 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_2	0x00005890 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_3	0x00005898 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_4	0x000058a0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_5	0x000058a8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_6	0x000058b0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_7	0x000058b8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_8	0x000058c0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_9	0x000058c8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_10	0x000058d0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_11	0x000058d8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_12	0x000058e0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_13	0x000058e8 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_14	0x000058f0 /* 64-bit */
+#define GRCMBOX_RCVRET_CON_IDX_15	0x000058f8 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_0	0x00005900 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_1	0x00005908 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_2	0x00005910 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_3	0x00005918 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_4	0x00005920 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_5	0x00005928 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_6	0x00005930 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_7	0x00005938 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_8	0x00005940 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_9	0x00005948 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_10	0x00005950 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_11	0x00005958 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_12	0x00005960 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_13	0x00005968 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_14	0x00005970 /* 64-bit */
+#define GRCMBOX_SNDHOST_PROD_IDX_15	0x00005978 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_0	0x00005980 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_1	0x00005988 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_2	0x00005990 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_3	0x00005998 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_4	0x000059a0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_5	0x000059a8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_6	0x000059b0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_7	0x000059b8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_8	0x000059c0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_9	0x000059c8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_10	0x000059d0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_11	0x000059d8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_12	0x000059e0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_13	0x000059e8 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_14	0x000059f0 /* 64-bit */
+#define GRCMBOX_SNDNIC_PROD_IDX_15	0x000059f8 /* 64-bit */
+#define GRCMBOX_HIGH_PRIO_EV_VECTOR	0x00005a00
+#define GRCMBOX_HIGH_PRIO_EV_MASK	0x00005a04
+#define GRCMBOX_LOW_PRIO_EV_VEC		0x00005a08
+#define GRCMBOX_LOW_PRIO_EV_MASK	0x00005a0c
+/* 0x5a10 --> 0x5c00 */
+
+/* Flow Through queues */
+#define FTQ_RESET			0x00005c00
+/* 0x5c04 --> 0x5c10 unused */
+#define FTQ_DMA_NORM_READ_CTL		0x00005c10
+#define FTQ_DMA_NORM_READ_FULL_CNT	0x00005c14
+#define FTQ_DMA_NORM_READ_FIFO_ENQDEQ	0x00005c18
+#define FTQ_DMA_NORM_READ_WRITE_PEEK	0x00005c1c
+#define FTQ_DMA_HIGH_READ_CTL		0x00005c20
+#define FTQ_DMA_HIGH_READ_FULL_CNT	0x00005c24
+#define FTQ_DMA_HIGH_READ_FIFO_ENQDEQ	0x00005c28
+#define FTQ_DMA_HIGH_READ_WRITE_PEEK	0x00005c2c
+#define FTQ_DMA_COMP_DISC_CTL		0x00005c30
+#define FTQ_DMA_COMP_DISC_FULL_CNT	0x00005c34
+#define FTQ_DMA_COMP_DISC_FIFO_ENQDEQ	0x00005c38
+#define FTQ_DMA_COMP_DISC_WRITE_PEEK	0x00005c3c
+#define FTQ_SEND_BD_COMP_CTL		0x00005c40
+#define FTQ_SEND_BD_COMP_FULL_CNT	0x00005c44
+#define FTQ_SEND_BD_COMP_FIFO_ENQDEQ	0x00005c48
+#define FTQ_SEND_BD_COMP_WRITE_PEEK	0x00005c4c
+#define FTQ_SEND_DATA_INIT_CTL		0x00005c50
+#define FTQ_SEND_DATA_INIT_FULL_CNT	0x00005c54
+#define FTQ_SEND_DATA_INIT_FIFO_ENQDEQ	0x00005c58
+#define FTQ_SEND_DATA_INIT_WRITE_PEEK	0x00005c5c
+#define FTQ_DMA_NORM_WRITE_CTL		0x00005c60
+#define FTQ_DMA_NORM_WRITE_FULL_CNT	0x00005c64
+#define FTQ_DMA_NORM_WRITE_FIFO_ENQDEQ	0x00005c68
+#define FTQ_DMA_NORM_WRITE_WRITE_PEEK	0x00005c6c
+#define FTQ_DMA_HIGH_WRITE_CTL		0x00005c70
+#define FTQ_DMA_HIGH_WRITE_FULL_CNT	0x00005c74
+#define FTQ_DMA_HIGH_WRITE_FIFO_ENQDEQ	0x00005c78
+#define FTQ_DMA_HIGH_WRITE_WRITE_PEEK	0x00005c7c
+#define FTQ_SWTYPE1_CTL			0x00005c80
+#define FTQ_SWTYPE1_FULL_CNT		0x00005c84
+#define FTQ_SWTYPE1_FIFO_ENQDEQ		0x00005c88
+#define FTQ_SWTYPE1_WRITE_PEEK		0x00005c8c
+#define FTQ_SEND_DATA_COMP_CTL		0x00005c90
+#define FTQ_SEND_DATA_COMP_FULL_CNT	0x00005c94
+#define FTQ_SEND_DATA_COMP_FIFO_ENQDEQ	0x00005c98
+#define FTQ_SEND_DATA_COMP_WRITE_PEEK	0x00005c9c
+#define FTQ_HOST_COAL_CTL		0x00005ca0
+#define FTQ_HOST_COAL_FULL_CNT		0x00005ca4
+#define FTQ_HOST_COAL_FIFO_ENQDEQ	0x00005ca8
+#define FTQ_HOST_COAL_WRITE_PEEK	0x00005cac
+#define FTQ_MAC_TX_CTL			0x00005cb0
+#define FTQ_MAC_TX_FULL_CNT		0x00005cb4
+#define FTQ_MAC_TX_FIFO_ENQDEQ		0x00005cb8
+#define FTQ_MAC_TX_WRITE_PEEK		0x00005cbc
+#define FTQ_MB_FREE_CTL			0x00005cc0
+#define FTQ_MB_FREE_FULL_CNT		0x00005cc4
+#define FTQ_MB_FREE_FIFO_ENQDEQ		0x00005cc8
+#define FTQ_MB_FREE_WRITE_PEEK		0x00005ccc
+#define FTQ_RCVBD_COMP_CTL		0x00005cd0
+#define FTQ_RCVBD_COMP_FULL_CNT		0x00005cd4
+#define FTQ_RCVBD_COMP_FIFO_ENQDEQ	0x00005cd8
+#define FTQ_RCVBD_COMP_WRITE_PEEK	0x00005cdc
+#define FTQ_RCVLST_PLMT_CTL		0x00005ce0
+#define FTQ_RCVLST_PLMT_FULL_CNT	0x00005ce4
+#define FTQ_RCVLST_PLMT_FIFO_ENQDEQ	0x00005ce8
+#define FTQ_RCVLST_PLMT_WRITE_PEEK	0x00005cec
+#define FTQ_RCVDATA_INI_CTL		0x00005cf0
+#define FTQ_RCVDATA_INI_FULL_CNT	0x00005cf4
+#define FTQ_RCVDATA_INI_FIFO_ENQDEQ	0x00005cf8
+#define FTQ_RCVDATA_INI_WRITE_PEEK	0x00005cfc
+#define FTQ_RCVDATA_COMP_CTL		0x00005d00
+#define FTQ_RCVDATA_COMP_FULL_CNT	0x00005d04
+#define FTQ_RCVDATA_COMP_FIFO_ENQDEQ	0x00005d08
+#define FTQ_RCVDATA_COMP_WRITE_PEEK	0x00005d0c
+#define FTQ_SWTYPE2_CTL			0x00005d10
+#define FTQ_SWTYPE2_FULL_CNT		0x00005d14
+#define FTQ_SWTYPE2_FIFO_ENQDEQ		0x00005d18
+#define FTQ_SWTYPE2_WRITE_PEEK		0x00005d1c
+/* 0x5d20 --> 0x6000 unused */
+
+/* Message signaled interrupt registers */
+#define MSGINT_MODE			0x00006000
+#define  MSGINT_MODE_RESET		 0x00000001
+#define  MSGINT_MODE_ENABLE		 0x00000002
+#define MSGINT_STATUS			0x00006004
+#define MSGINT_FIFO			0x00006008
+/* 0x600c --> 0x6400 unused */
+
+/* DMA completion registers */
+#define DMAC_MODE			0x00006400
+#define  DMAC_MODE_RESET		 0x00000001
+#define  DMAC_MODE_ENABLE		 0x00000002
+/* 0x6404 --> 0x6800 unused */
+
+/* GRC registers */
+#define GRC_MODE			0x00006800
+#define  GRC_MODE_UPD_ON_COAL		0x00000001
+#define  GRC_MODE_BSWAP_NONFRM_DATA	0x00000002
+#define  GRC_MODE_WSWAP_NONFRM_DATA	0x00000004
+#define  GRC_MODE_BSWAP_DATA		0x00000010
+#define  GRC_MODE_WSWAP_DATA		0x00000020
+#define  GRC_MODE_SPLITHDR		0x00000100
+#define  GRC_MODE_NOFRM_CRACKING	0x00000200
+#define  GRC_MODE_INCL_CRC		0x00000400
+#define  GRC_MODE_ALLOW_BAD_FRMS	0x00000800
+#define  GRC_MODE_NOIRQ_ON_SENDS	0x00002000
+#define  GRC_MODE_NOIRQ_ON_RCV		0x00004000
+#define  GRC_MODE_FORCE_PCI32BIT	0x00008000
+#define  GRC_MODE_HOST_STACKUP		0x00010000
+#define  GRC_MODE_HOST_SENDBDS		0x00020000
+#define  GRC_MODE_NO_TX_PHDR_CSUM	0x00100000
+#define  GRC_MODE_NO_RX_PHDR_CSUM	0x00800000
+#define  GRC_MODE_IRQ_ON_TX_CPU_ATTN	0x01000000
+#define  GRC_MODE_IRQ_ON_RX_CPU_ATTN	0x02000000
+#define  GRC_MODE_IRQ_ON_MAC_ATTN	0x04000000
+#define  GRC_MODE_IRQ_ON_DMA_ATTN	0x08000000
+#define  GRC_MODE_IRQ_ON_FLOW_ATTN	0x10000000
+#define  GRC_MODE_4X_NIC_SEND_RINGS	0x20000000
+#define  GRC_MODE_MCAST_FRM_ENABLE	0x40000000
+#define GRC_MISC_CFG			0x00006804
+#define  GRC_MISC_CFG_CORECLK_RESET	0x00000001
+#define  GRC_MISC_CFG_PRESCALAR_MASK	0x000000fe
+#define  GRC_MISC_CFG_PRESCALAR_SHIFT	1
+#define  GRC_MISC_CFG_BOARD_ID_MASK	0x0001e000
+#define  GRC_MISC_CFG_BOARD_ID_5700	0x0001e000
+#define  GRC_MISC_CFG_BOARD_ID_5701	0x00000000
+#define  GRC_MISC_CFG_BOARD_ID_5702FE	0x00004000
+#define  GRC_MISC_CFG_BOARD_ID_5703	0x00000000
+#define  GRC_MISC_CFG_BOARD_ID_5703S	0x00002000
+#define  GRC_MISC_CFG_BOARD_ID_5704	0x00000000
+#define  GRC_MISC_CFG_BOARD_ID_5704CIOBE 0x00004000
+#define  GRC_MISC_CFG_BOARD_ID_5704_A2	0x00008000
+#define  GRC_MISC_CFG_BOARD_ID_5704_X	0x0000C000
+#define  GRC_MISC_CFG_BOARD_ID_AC91002A1 0x00018000
+#define GRC_LOCAL_CTRL			0x00006808
+#define  GRC_LCLCTRL_INT_ACTIVE		0x00000001
+#define  GRC_LCLCTRL_CLEARINT		0x00000002
+#define  GRC_LCLCTRL_SETINT		0x00000004
+#define  GRC_LCLCTRL_INT_ON_ATTN	0x00000008
+#define  GRC_LCLCTRL_GPIO_INPUT0	0x00000100
+#define  GRC_LCLCTRL_GPIO_INPUT1	0x00000200
+#define  GRC_LCLCTRL_GPIO_INPUT2	0x00000400
+#define  GRC_LCLCTRL_GPIO_OE0		0x00000800
+#define  GRC_LCLCTRL_GPIO_OE1		0x00001000
+#define  GRC_LCLCTRL_GPIO_OE2		0x00002000
+#define  GRC_LCLCTRL_GPIO_OUTPUT0	0x00004000
+#define  GRC_LCLCTRL_GPIO_OUTPUT1	0x00008000
+#define  GRC_LCLCTRL_GPIO_OUTPUT2	0x00010000
+#define  GRC_LCLCTRL_EXTMEM_ENABLE	0x00020000
+#define  GRC_LCLCTRL_MEMSZ_MASK		0x001c0000
+#define  GRC_LCLCTRL_MEMSZ_256K		0x00000000
+#define  GRC_LCLCTRL_MEMSZ_512K		0x00040000
+#define  GRC_LCLCTRL_MEMSZ_1M		0x00080000
+#define  GRC_LCLCTRL_MEMSZ_2M		0x000c0000
+#define  GRC_LCLCTRL_MEMSZ_4M		0x00100000
+#define  GRC_LCLCTRL_MEMSZ_8M		0x00140000
+#define  GRC_LCLCTRL_MEMSZ_16M		0x00180000
+#define  GRC_LCLCTRL_BANK_SELECT	0x00200000
+#define  GRC_LCLCTRL_SSRAM_TYPE		0x00400000
+#define  GRC_LCLCTRL_AUTO_SEEPROM	0x01000000
+#define GRC_TIMER			0x0000680c
+#define GRC_RX_CPU_EVENT		0x00006810
+#define GRC_RX_TIMER_REF		0x00006814
+#define GRC_RX_CPU_SEM			0x00006818
+#define GRC_REMOTE_RX_CPU_ATTN		0x0000681c
+#define GRC_TX_CPU_EVENT		0x00006820
+#define GRC_TX_TIMER_REF		0x00006824
+#define GRC_TX_CPU_SEM			0x00006828
+#define GRC_REMOTE_TX_CPU_ATTN		0x0000682c
+#define GRC_MEM_POWER_UP		0x00006830 /* 64-bit */
+#define GRC_EEPROM_ADDR			0x00006838
+#define  EEPROM_ADDR_WRITE		0x00000000
+#define  EEPROM_ADDR_READ		0x80000000
+#define  EEPROM_ADDR_COMPLETE		0x40000000
+#define  EEPROM_ADDR_FSM_RESET		0x20000000
+#define  EEPROM_ADDR_DEVID_MASK		0x1c000000
+#define  EEPROM_ADDR_DEVID_SHIFT	26
+#define  EEPROM_ADDR_START		0x02000000
+#define  EEPROM_ADDR_CLKPERD_SHIFT	16
+#define  EEPROM_ADDR_ADDR_MASK		0x0000ffff
+#define  EEPROM_ADDR_ADDR_SHIFT		0
+#define  EEPROM_DEFAULT_CLOCK_PERIOD	0x60
+#define  EEPROM_CHIP_SIZE		(64 * 1024)
+#define GRC_EEPROM_DATA			0x0000683c
+#define GRC_EEPROM_CTRL			0x00006840
+#define GRC_MDI_CTRL			0x00006844
+#define GRC_SEEPROM_DELAY		0x00006848
+/* 0x684c --> 0x6c00 unused */
+
+/* 0x6c00 --> 0x7000 unused */
+
+/* NVRAM Control registers */
+#define NVRAM_CMD			0x00007000
+#define  NVRAM_CMD_RESET		 0x00000001
+#define  NVRAM_CMD_DONE			 0x00000008
+#define  NVRAM_CMD_GO			 0x00000010
+#define  NVRAM_CMD_WR			 0x00000020
+#define  NVRAM_CMD_RD			 0x00000000
+#define  NVRAM_CMD_ERASE		 0x00000040
+#define  NVRAM_CMD_FIRST		 0x00000080
+#define  NVRAM_CMD_LAST			 0x00000100
+#define NVRAM_STAT			0x00007004
+#define NVRAM_WRDATA			0x00007008
+#define NVRAM_ADDR			0x0000700c
+#define  NVRAM_ADDR_MSK			0x00ffffff
+#define NVRAM_RDDATA			0x00007010
+#define NVRAM_CFG1			0x00007014
+#define  NVRAM_CFG1_FLASHIF_ENAB	 0x00000001
+#define  NVRAM_CFG1_BUFFERED_MODE	 0x00000002
+#define  NVRAM_CFG1_PASS_THRU		 0x00000004
+#define  NVRAM_CFG1_BIT_BANG		 0x00000008
+#define  NVRAM_CFG1_COMPAT_BYPASS	 0x80000000
+#define NVRAM_CFG2			0x00007018
+#define NVRAM_CFG3			0x0000701c
+#define NVRAM_SWARB			0x00007020
+#define  SWARB_REQ_SET0			 0x00000001
+#define  SWARB_REQ_SET1			 0x00000002
+#define  SWARB_REQ_SET2			 0x00000004
+#define  SWARB_REQ_SET3			 0x00000008
+#define  SWARB_REQ_CLR0			 0x00000010
+#define  SWARB_REQ_CLR1			 0x00000020
+#define  SWARB_REQ_CLR2			 0x00000040
+#define  SWARB_REQ_CLR3			 0x00000080
+#define  SWARB_GNT0			 0x00000100
+#define  SWARB_GNT1			 0x00000200
+#define  SWARB_GNT2			 0x00000400
+#define  SWARB_GNT3			 0x00000800
+#define  SWARB_REQ0			 0x00001000
+#define  SWARB_REQ1			 0x00002000
+#define  SWARB_REQ2			 0x00004000
+#define  SWARB_REQ3			 0x00008000
+#define    NVRAM_BUFFERED_PAGE_SIZE	   264
+#define    NVRAM_BUFFERED_PAGE_POS	   9
+/* 0x7024 --> 0x7400 unused */
+
+/* 0x7400 --> 0x8000 unused */
+
+/* 32K Window into NIC internal memory */
+#define NIC_SRAM_WIN_BASE		0x00008000
+
+/* Offsets into first 32k of NIC internal memory. */
+#define NIC_SRAM_PAGE_ZERO		0x00000000
+#define NIC_SRAM_SEND_RCB		0x00000100 /* 16 * TG3_BDINFO_... */
+#define NIC_SRAM_RCV_RET_RCB		0x00000200 /* 16 * TG3_BDINFO_... */
+#define NIC_SRAM_STATS_BLK		0x00000300
+#define NIC_SRAM_STATUS_BLK		0x00000b00
+
+#define NIC_SRAM_FIRMWARE_MBOX		0x00000b50
+#define  NIC_SRAM_FIRMWARE_MBOX_MAGIC1	 0x4B657654
+#define  NIC_SRAM_FIRMWARE_MBOX_MAGIC2	 0x4861764b /* !dma on linkchg */
+
+#define NIC_SRAM_DATA_SIG		0x00000b54
+#define  NIC_SRAM_DATA_SIG_MAGIC	 0x4b657654 /* ascii for 'KevT' */
+
+#define NIC_SRAM_DATA_CFG			0x00000b58
+#define  NIC_SRAM_DATA_CFG_LED_MODE_MASK	 0x0000000c
+#define  NIC_SRAM_DATA_CFG_LED_MODE_UNKNOWN	 0x00000000
+#define  NIC_SRAM_DATA_CFG_LED_TRIPLE_SPD	 0x00000004
+#define  NIC_SRAM_DATA_CFG_LED_OPEN_DRAIN	 0x00000004
+#define  NIC_SRAM_DATA_CFG_LED_LINK_SPD		 0x00000008
+#define  NIC_SRAM_DATA_CFG_LED_OUTPUT		 0x00000008
+#define  NIC_SRAM_DATA_CFG_PHY_TYPE_MASK	 0x00000030
+#define  NIC_SRAM_DATA_CFG_PHY_TYPE_UNKNOWN	 0x00000000
+#define  NIC_SRAM_DATA_CFG_PHY_TYPE_COPPER	 0x00000010
+#define  NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER	 0x00000020
+#define  NIC_SRAM_DATA_CFG_WOL_ENABLE		 0x00000040
+#define  NIC_SRAM_DATA_CFG_ASF_ENABLE		 0x00000080
+#define  NIC_SRAM_DATA_CFG_EEPROM_WP		 0x00000100
+#define  NIC_SRAM_DATA_CFG_FIBER_WOL		 0x00004000
+
+#define NIC_SRAM_DATA_PHY_ID		0x00000b74
+#define  NIC_SRAM_DATA_PHY_ID1_MASK	 0xffff0000
+#define  NIC_SRAM_DATA_PHY_ID2_MASK	 0x0000ffff
+
+#define NIC_SRAM_FW_CMD_MBOX		0x00000b78
+#define  FWCMD_NICDRV_ALIVE		 0x00000001
+#define  FWCMD_NICDRV_PAUSE_FW		 0x00000002
+#define  FWCMD_NICDRV_IPV4ADDR_CHG	 0x00000003
+#define  FWCMD_NICDRV_IPV6ADDR_CHG	 0x00000004
+#define  FWCMD_NICDRV_FIX_DMAR		 0x00000005
+#define  FWCMD_NICDRV_FIX_DMAW		 0x00000006
+#define NIC_SRAM_FW_CMD_LEN_MBOX	0x00000b7c
+#define NIC_SRAM_FW_CMD_DATA_MBOX	0x00000b80
+#define NIC_SRAM_FW_ASF_STATUS_MBOX	0x00000c00
+#define NIC_SRAM_FW_DRV_STATE_MBOX	0x00000c04
+#define  DRV_STATE_START		 0x00000001
+#define  DRV_STATE_UNLOAD		 0x00000002
+#define  DRV_STATE_WOL			 0x00000003
+#define  DRV_STATE_SUSPEND		 0x00000004
+
+#define NIC_SRAM_FW_RESET_TYPE_MBOX	0x00000c08
+
+#define NIC_SRAM_MAC_ADDR_HIGH_MBOX	0x00000c14
+#define NIC_SRAM_MAC_ADDR_LOW_MBOX	0x00000c18
+
+#define NIC_SRAM_RX_MINI_BUFFER_DESC	0x00001000
+
+#define NIC_SRAM_DMA_DESC_POOL_BASE	0x00002000
+#define  NIC_SRAM_DMA_DESC_POOL_SIZE	 0x00002000
+#define NIC_SRAM_TX_BUFFER_DESC		0x00004000 /* 512 entries */
+#define NIC_SRAM_RX_BUFFER_DESC		0x00006000 /* 256 entries */
+#define NIC_SRAM_RX_JUMBO_BUFFER_DESC	0x00007000 /* 256 entries */
+#define NIC_SRAM_MBUF_POOL_BASE		0x00008000
+#define  NIC_SRAM_MBUF_POOL_SIZE96	 0x00018000
+#define  NIC_SRAM_MBUF_POOL_SIZE64	 0x00010000
+
+/* Currently this is fixed. */
+#define PHY_ADDR		0x01
+
+/* Tigon3 specific PHY MII registers. */
+#define  TG3_BMCR_SPEED1000		0x0040
+
+#define MII_TG3_CTRL			0x09 /* 1000-baseT control register */
+#define  MII_TG3_CTRL_ADV_1000_HALF	0x0100
+#define  MII_TG3_CTRL_ADV_1000_FULL	0x0200
+#define  MII_TG3_CTRL_AS_MASTER		0x0800
+#define  MII_TG3_CTRL_ENABLE_AS_MASTER	0x1000
+
+#define MII_TG3_EXT_CTRL		0x10 /* Extended control register */
+#define  MII_TG3_EXT_CTRL_LNK3_LED_MODE	0x0002
+#define  MII_TG3_EXT_CTRL_TBI		0x8000
+
+#define MII_TG3_EXT_STAT		0x11 /* Extended status register */
+#define  MII_TG3_EXT_STAT_LPASS		0x0100
+
+#define MII_TG3_DSP_RW_PORT		0x15 /* DSP coefficient read/write port */
+
+#define MII_TG3_DSP_ADDRESS		0x17 /* DSP address register */
+
+#define MII_TG3_AUX_CTRL		0x18 /* auxilliary control register */
+
+#define MII_TG3_AUX_STAT		0x19 /* auxilliary status register */
+#define MII_TG3_AUX_STAT_LPASS		0x0004
+#define MII_TG3_AUX_STAT_SPDMASK	0x0700
+#define MII_TG3_AUX_STAT_10HALF		0x0100
+#define MII_TG3_AUX_STAT_10FULL		0x0200
+#define MII_TG3_AUX_STAT_100HALF	0x0300
+#define MII_TG3_AUX_STAT_100_4		0x0400
+#define MII_TG3_AUX_STAT_100FULL	0x0500
+#define MII_TG3_AUX_STAT_1000HALF	0x0600
+#define MII_TG3_AUX_STAT_1000FULL	0x0700
+
+#define MII_TG3_ISTAT			0x1a /* IRQ status register */
+#define MII_TG3_IMASK			0x1b /* IRQ mask register */
+
+/* ISTAT/IMASK event bits */
+#define MII_TG3_INT_LINKCHG		0x0002
+#define MII_TG3_INT_SPEEDCHG		0x0004
+#define MII_TG3_INT_DUPLEXCHG		0x0008
+#define MII_TG3_INT_ANEG_PAGE_RX	0x0400
+
+/* XXX Add this to mii.h */
+#ifndef ADVERTISE_PAUSE
+#define ADVERTISE_PAUSE_CAP		0x0400
+#endif
+#ifndef ADVERTISE_PAUSE_ASYM
+#define ADVERTISE_PAUSE_ASYM		0x0800
+#endif
+#ifndef LPA_PAUSE
+#define LPA_PAUSE_CAP			0x0400
+#endif
+#ifndef LPA_PAUSE_ASYM
+#define LPA_PAUSE_ASYM			0x0800
+#endif
+
+/* There are two ways to manage the TX descriptors on the tigon3.
+ * Either the descriptors are in host DMA'able memory, or they
+ * exist only in the cards on-chip SRAM.  All 16 send bds are under
+ * the same mode, they may not be configured individually.
+ *
+ * The mode we use is controlled by TG3_FLAG_HOST_TXDS in tp->tg3_flags.
+ *
+ * To use host memory TX descriptors:
+ *	1) Set GRC_MODE_HOST_SENDBDS in GRC_MODE register.
+ *	   Make sure GRC_MODE_4X_NIC_SEND_RINGS is clear.
+ *	2) Allocate DMA'able memory.
+ *	3) In NIC_SRAM_SEND_RCB (of desired index) of on-chip SRAM:
+ *	   a) Set TG3_BDINFO_HOST_ADDR to DMA address of memory
+ *	      obtained in step 2
+ *	   b) Set TG3_BDINFO_NIC_ADDR to NIC_SRAM_TX_BUFFER_DESC.
+ *	   c) Set len field of TG3_BDINFO_MAXLEN_FLAGS to number
+ *            of TX descriptors.  Leave flags field clear.
+ *	4) Access TX descriptors via host memory.  The chip
+ *	   will refetch into local SRAM as needed when producer
+ *	   index mailboxes are updated.
+ *
+ * To use on-chip TX descriptors:
+ *	1) Set GRC_MODE_4X_NIC_SEND_RINGS in GRC_MODE register.
+ *	   Make sure GRC_MODE_HOST_SENDBDS is clear.
+ *	2) In NIC_SRAM_SEND_RCB (of desired index) of on-chip SRAM:
+ *	   a) Set TG3_BDINFO_HOST_ADDR to zero.
+ *	   b) Set TG3_BDINFO_NIC_ADDR to NIC_SRAM_TX_BUFFER_DESC
+ *	   c) TG3_BDINFO_MAXLEN_FLAGS is don't care.
+ *	3) Access TX descriptors directly in on-chip SRAM
+ *	   using normal {read,write}l().  (and not using
+ *         pointer dereferencing of ioremap()'d memory like
+ *	   the broken Broadcom driver does)
+ *
+ * Note that BDINFO_FLAGS_DISABLED should be set in the flags field of
+ * TG3_BDINFO_MAXLEN_FLAGS of all unused SEND_RCB indices.
+ */
+struct tg3_tx_buffer_desc {
+	u32				addr_hi;
+	u32				addr_lo;
+
+	u32				len_flags;
+#define TXD_FLAG_TCPUDP_CSUM		0x0001
+#define TXD_FLAG_IP_CSUM		0x0002
+#define TXD_FLAG_END			0x0004
+#define TXD_FLAG_IP_FRAG		0x0008
+#define TXD_FLAG_IP_FRAG_END		0x0010
+#define TXD_FLAG_VLAN			0x0040
+#define TXD_FLAG_COAL_NOW		0x0080
+#define TXD_FLAG_CPU_PRE_DMA		0x0100
+#define TXD_FLAG_CPU_POST_DMA		0x0200
+#define TXD_FLAG_ADD_SRC_ADDR		0x1000
+#define TXD_FLAG_CHOOSE_SRC_ADDR	0x6000
+#define TXD_FLAG_NO_CRC			0x8000
+#define TXD_LEN_SHIFT			16
+
+	u32				vlan_tag;
+#define TXD_VLAN_TAG_SHIFT		0
+#define TXD_MSS_SHIFT			16
+};
+
+#define TXD_ADDR			0x00UL /* 64-bit */
+#define TXD_LEN_FLAGS			0x08UL /* 32-bit (upper 16-bits are len) */
+#define TXD_VLAN_TAG			0x0cUL /* 32-bit (upper 16-bits are tag) */
+#define TXD_SIZE			0x10UL
+
+struct tg3_rx_buffer_desc {
+	u32				addr_hi;
+	u32				addr_lo;
+
+	u32				idx_len;
+#define RXD_IDX_MASK	0xffff0000
+#define RXD_IDX_SHIFT	16
+#define RXD_LEN_MASK	0x0000ffff
+#define RXD_LEN_SHIFT	0
+
+	u32				type_flags;
+#define RXD_TYPE_SHIFT	16
+#define RXD_FLAGS_SHIFT	0
+
+#define RXD_FLAG_END			0x0004
+#define RXD_FLAG_MINI			0x0800
+#define RXD_FLAG_JUMBO			0x0020
+#define RXD_FLAG_VLAN			0x0040
+#define RXD_FLAG_ERROR			0x0400
+#define RXD_FLAG_IP_CSUM		0x1000
+#define RXD_FLAG_TCPUDP_CSUM		0x2000
+#define RXD_FLAG_IS_TCP			0x4000
+
+	u32				ip_tcp_csum;
+#define RXD_IPCSUM_MASK		0xffff0000
+#define RXD_IPCSUM_SHIFT	16
+#define RXD_TCPCSUM_MASK	0x0000ffff
+#define RXD_TCPCSUM_SHIFT	0
+
+	u32				err_vlan;
+
+#define RXD_VLAN_MASK			0x0000ffff
+
+#define RXD_ERR_BAD_CRC			0x00010000
+#define RXD_ERR_COLLISION		0x00020000
+#define RXD_ERR_LINK_LOST		0x00040000
+#define RXD_ERR_PHY_DECODE		0x00080000
+#define RXD_ERR_ODD_NIBBLE_RCVD_MII	0x00100000
+#define RXD_ERR_MAC_ABRT		0x00200000
+#define RXD_ERR_TOO_SMALL		0x00400000
+#define RXD_ERR_NO_RESOURCES		0x00800000
+#define RXD_ERR_HUGE_FRAME		0x01000000
+#define RXD_ERR_MASK			0xffff0000
+
+	u32				reserved;
+	u32				opaque;
+#define RXD_OPAQUE_INDEX_MASK		0x0000ffff
+#define RXD_OPAQUE_INDEX_SHIFT		0
+#define RXD_OPAQUE_RING_STD		0x00010000
+#define RXD_OPAQUE_RING_JUMBO		0x00020000
+#define RXD_OPAQUE_RING_MINI		0x00040000
+#define RXD_OPAQUE_RING_MASK		0x00070000
+};
+
+struct tg3_ext_rx_buffer_desc {
+	struct {
+		u32			addr_hi;
+		u32			addr_lo;
+	}				addrlist[3];
+	u32				len2_len1;
+	u32				resv_len3;
+	struct tg3_rx_buffer_desc	std;
+};
+
+/* We only use this when testing out the DMA engine
+ * at probe time.  This is the internal format of buffer
+ * descriptors used by the chip at NIC_SRAM_DMA_DESCS.
+ */
+struct tg3_internal_buffer_desc {
+	u32				addr_hi;
+	u32				addr_lo;
+	u32				nic_mbuf;
+	/* XXX FIX THIS */
+#ifdef __BIG_ENDIAN
+	u16				cqid_sqid;
+	u16				len;
+#else
+	u16				len;
+	u16				cqid_sqid;
+#endif
+	u32				flags;
+	u32				__cookie1;
+	u32				__cookie2;
+	u32				__cookie3;
+};
+
+#define TG3_HW_STATUS_SIZE		0x50
+struct tg3_hw_status {
+	u32				status;
+#define SD_STATUS_UPDATED		0x00000001
+#define SD_STATUS_LINK_CHG		0x00000002
+#define SD_STATUS_ERROR			0x00000004
+
+	u32				status_tag;
+
+#ifdef __BIG_ENDIAN
+	u16				rx_consumer;
+	u16				rx_jumbo_consumer;
+#else
+	u16				rx_jumbo_consumer;
+	u16				rx_consumer;
+#endif
+
+#ifdef __BIG_ENDIAN
+	u16				reserved;
+	u16				rx_mini_consumer;
+#else
+	u16				rx_mini_consumer;
+	u16				reserved;
+#endif
+	struct {
+#ifdef __BIG_ENDIAN
+		u16			tx_consumer;
+		u16			rx_producer;
+#else
+		u16			rx_producer;
+		u16			tx_consumer;
+#endif
+	}				idx[16];
+};
+
+typedef struct {
+	u32 high, low;
+} tg3_stat64_t;
+
+struct tg3_hw_stats {
+	u8				__reserved0[0x400-0x300];
+
+	/* Statistics maintained by Receive MAC. */
+	tg3_stat64_t			rx_octets;
+	u64				__reserved1;
+	tg3_stat64_t			rx_fragments;
+	tg3_stat64_t			rx_ucast_packets;
+	tg3_stat64_t			rx_mcast_packets;
+	tg3_stat64_t			rx_bcast_packets;
+	tg3_stat64_t			rx_fcs_errors;
+	tg3_stat64_t			rx_align_errors;
+	tg3_stat64_t			rx_xon_pause_rcvd;
+	tg3_stat64_t			rx_xoff_pause_rcvd;
+	tg3_stat64_t			rx_mac_ctrl_rcvd;
+	tg3_stat64_t			rx_xoff_entered;
+	tg3_stat64_t			rx_frame_too_long_errors;
+	tg3_stat64_t			rx_jabbers;
+	tg3_stat64_t			rx_undersize_packets;
+	tg3_stat64_t			rx_in_length_errors;
+	tg3_stat64_t			rx_out_length_errors;
+	tg3_stat64_t			rx_64_or_less_octet_packets;
+	tg3_stat64_t			rx_65_to_127_octet_packets;
+	tg3_stat64_t			rx_128_to_255_octet_packets;
+	tg3_stat64_t			rx_256_to_511_octet_packets;
+	tg3_stat64_t			rx_512_to_1023_octet_packets;
+	tg3_stat64_t			rx_1024_to_1522_octet_packets;
+	tg3_stat64_t			rx_1523_to_2047_octet_packets;
+	tg3_stat64_t			rx_2048_to_4095_octet_packets;
+	tg3_stat64_t			rx_4096_to_8191_octet_packets;
+	tg3_stat64_t			rx_8192_to_9022_octet_packets;
+
+	u64				__unused0[37];
+
+	/* Statistics maintained by Transmit MAC. */
+	tg3_stat64_t			tx_octets;
+	u64				__reserved2;
+	tg3_stat64_t			tx_collisions;
+	tg3_stat64_t			tx_xon_sent;
+	tg3_stat64_t			tx_xoff_sent;
+	tg3_stat64_t			tx_flow_control;
+	tg3_stat64_t			tx_mac_errors;
+	tg3_stat64_t			tx_single_collisions;
+	tg3_stat64_t			tx_mult_collisions;
+	tg3_stat64_t			tx_deferred;
+	u64				__reserved3;
+	tg3_stat64_t			tx_excessive_collisions;
+	tg3_stat64_t			tx_late_collisions;
+	tg3_stat64_t			tx_collide_2times;
+	tg3_stat64_t			tx_collide_3times;
+	tg3_stat64_t			tx_collide_4times;
+	tg3_stat64_t			tx_collide_5times;
+	tg3_stat64_t			tx_collide_6times;
+	tg3_stat64_t			tx_collide_7times;
+	tg3_stat64_t			tx_collide_8times;
+	tg3_stat64_t			tx_collide_9times;
+	tg3_stat64_t			tx_collide_10times;
+	tg3_stat64_t			tx_collide_11times;
+	tg3_stat64_t			tx_collide_12times;
+	tg3_stat64_t			tx_collide_13times;
+	tg3_stat64_t			tx_collide_14times;
+	tg3_stat64_t			tx_collide_15times;
+	tg3_stat64_t			tx_ucast_packets;
+	tg3_stat64_t			tx_mcast_packets;
+	tg3_stat64_t			tx_bcast_packets;
+	tg3_stat64_t			tx_carrier_sense_errors;
+	tg3_stat64_t			tx_discards;
+	tg3_stat64_t			tx_errors;
+
+	u64				__unused1[31];
+
+	/* Statistics maintained by Receive List Placement. */
+	tg3_stat64_t			COS_rx_packets[16];
+	tg3_stat64_t			COS_rx_filter_dropped;
+	tg3_stat64_t			dma_writeq_full;
+	tg3_stat64_t			dma_write_prioq_full;
+	tg3_stat64_t			rxbds_empty;
+	tg3_stat64_t			rx_discards;
+	tg3_stat64_t			rx_errors;
+	tg3_stat64_t			rx_threshold_hit;
+
+	u64				__unused2[9];
+
+	/* Statistics maintained by Send Data Initiator. */
+	tg3_stat64_t			COS_out_packets[16];
+	tg3_stat64_t			dma_readq_full;
+	tg3_stat64_t			dma_read_prioq_full;
+	tg3_stat64_t			tx_comp_queue_full;
+
+	/* Statistics maintained by Host Coalescing. */
+	tg3_stat64_t			ring_set_send_prod_index;
+	tg3_stat64_t			ring_status_update;
+	tg3_stat64_t			nic_irqs;
+	tg3_stat64_t			nic_avoided_irqs;
+	tg3_stat64_t			nic_tx_threshold_hit;
+
+	u8				__reserved4[0xb00-0x9c0];
+};
+
+enum phy_led_mode {
+	led_mode_auto,
+	led_mode_three_link,
+	led_mode_link10
+};
+
+/* 'mapping' is superfluous as the chip does not write into
+ * the tx/rx post rings so we could just fetch it from there.
+ * But the cache behavior is better how we are doing it now.
+ */
+struct ring_info {
+	struct sk_buff			*skb;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+struct tx_ring_info {
+	struct sk_buff			*skb;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+	u32				prev_vlan_tag;
+};
+
+struct tg3_config_info {
+	u32				flags;
+};
+
+struct tg3_link_config {
+	/* Describes what we're trying to get. */
+	u32				advertising;
+	u16				speed;
+	u8				duplex;
+	u8				autoneg;
+
+	/* Describes what we actually have. */
+	u16				active_speed;
+	u8				active_duplex;
+#define SPEED_INVALID		0xffff
+#define DUPLEX_INVALID		0xff
+#define AUTONEG_INVALID		0xff
+
+	/* When we go in and out of low power mode we need
+	 * to swap with this state.
+	 */
+	int				phy_is_low_power;
+	u16				orig_speed;
+	u8				orig_duplex;
+	u8				orig_autoneg;
+};
+
+struct tg3_bufmgr_config {
+	u32		mbuf_read_dma_low_water;
+	u32		mbuf_mac_rx_low_water;
+	u32		mbuf_high_water;
+
+	u32		mbuf_read_dma_low_water_jumbo;
+	u32		mbuf_mac_rx_low_water_jumbo;
+	u32		mbuf_high_water_jumbo;
+
+	u32		dma_low_water;
+	u32		dma_high_water;
+};
+
+struct tg3 {
+	/* begin "general, frequently-used members" cacheline section */
+
+	/* SMP locking strategy:
+	 *
+	 * lock: Held during all operations except TX packet
+	 *       processing.
+	 *
+	 * tx_lock: Held during tg3_start_xmit{,_4gbug} and tg3_tx
+	 *
+	 * If you want to shut up all asynchronous processing you must
+	 * acquire both locks, 'lock' taken before 'tx_lock'.  IRQs must
+	 * be disabled to take 'lock' but only softirq disabling is
+	 * necessary for acquisition of 'tx_lock'.
+	 */
+	spinlock_t			lock;
+	spinlock_t			indirect_lock;
+
+	unsigned long			regs;
+	struct net_device		*dev;
+	struct pci_dev			*pdev;
+
+	struct tg3_hw_status		*hw_status;
+	dma_addr_t			status_mapping;
+
+	u32				msg_enable;
+
+	/* begin "tx thread" cacheline section */
+	u32				tx_prod;
+	u32				tx_cons;
+	u32				tx_pending;
+
+	spinlock_t			tx_lock;
+
+	/* TX descs are only used if TG3_FLAG_HOST_TXDS is set. */
+	struct tg3_tx_buffer_desc	*tx_ring;
+	struct tx_ring_info		*tx_buffers;
+	dma_addr_t			tx_desc_mapping;
+
+	/* begin "rx thread" cacheline section */
+	u32				rx_rcb_ptr;
+	u32				rx_std_ptr;
+	u32				rx_jumbo_ptr;
+	u32				rx_pending;
+	u32				rx_jumbo_pending;
+#if TG3_VLAN_TAG_USED
+	struct vlan_group		*vlgrp;
+#endif
+
+	struct tg3_rx_buffer_desc	*rx_std;
+	struct ring_info		*rx_std_buffers;
+	dma_addr_t			rx_std_mapping;
+
+	struct tg3_rx_buffer_desc	*rx_jumbo;
+	struct ring_info		*rx_jumbo_buffers;
+	dma_addr_t			rx_jumbo_mapping;
+
+	struct tg3_rx_buffer_desc	*rx_rcb;
+	dma_addr_t			rx_rcb_mapping;
+
+	/* begin "everything else" cacheline(s) section */
+	struct net_device_stats		net_stats;
+	struct net_device_stats		net_stats_prev;
+	unsigned long			phy_crc_errors;
+
+	u32				rx_offset;
+	u32				tg3_flags;
+#define TG3_FLAG_HOST_TXDS		0x00000001
+#define TG3_FLAG_TXD_MBOX_HWBUG		0x00000002
+#define TG3_FLAG_RX_CHECKSUMS		0x00000004
+#define TG3_FLAG_USE_LINKCHG_REG	0x00000008
+#define TG3_FLAG_USE_MI_INTERRUPT	0x00000010
+#define TG3_FLAG_ENABLE_ASF		0x00000020
+#define TG3_FLAG_POLL_SERDES		0x00000080
+#define TG3_FLAG_MBOX_WRITE_REORDER	0x00000100
+#define TG3_FLAG_PCIX_TARGET_HWBUG	0x00000200
+#define TG3_FLAG_WOL_SPEED_100MB	0x00000400
+#define TG3_FLAG_WOL_ENABLE		0x00000800
+#define TG3_FLAG_EEPROM_WRITE_PROT	0x00001000
+#define TG3_FLAG_NVRAM			0x00002000
+#define TG3_FLAG_NVRAM_BUFFERED		0x00004000
+#define TG3_FLAG_RX_PAUSE		0x00008000
+#define TG3_FLAG_TX_PAUSE		0x00010000
+#define TG3_FLAG_PCIX_MODE		0x00020000
+#define TG3_FLAG_PCI_HIGH_SPEED		0x00040000
+#define TG3_FLAG_PCI_32BIT		0x00080000
+#define TG3_FLAG_NO_TX_PSEUDO_CSUM	0x00100000
+#define TG3_FLAG_NO_RX_PSEUDO_CSUM	0x00200000
+#define TG3_FLAG_SERDES_WOL_CAP		0x00400000
+#define TG3_FLAG_JUMBO_ENABLE		0x00800000
+#define TG3_FLAG_10_100_ONLY		0x01000000
+#define TG3_FLAG_PAUSE_AUTONEG		0x02000000
+#define TG3_FLAG_PAUSE_RX		0x04000000
+#define TG3_FLAG_PAUSE_TX		0x08000000
+#define TG3_FLAG_BROKEN_CHECKSUMS	0x10000000
+#define TG3_FLAG_GOT_SERDES_FLOWCTL	0x20000000
+#define TG3_FLAG_SPLIT_MODE		0x40000000
+#define TG3_FLAG_INIT_COMPLETE		0x80000000
+
+	u32				split_mode_max_reqs;
+#define SPLIT_MODE_5704_MAX_REQ		3
+
+	struct timer_list		timer;
+	u16				timer_counter;
+	u16				timer_multiplier;
+	u32				timer_offset;
+	u16				asf_counter;
+	u16				asf_multiplier;
+
+	struct tg3_link_config		link_config;
+	struct tg3_bufmgr_config	bufmgr_config;
+
+	/* cache h/w values, often passed straight to h/w */
+	u32				rx_mode;
+	u32				tx_mode;
+	u32				mac_mode;
+	u32				mi_mode;
+	u32				misc_host_ctrl;
+	u32				grc_mode;
+	u32				grc_local_ctrl;
+	u32				dma_rwctrl;
+	u32				coalesce_mode;
+
+	/* PCI block */
+	u16				pci_chip_rev_id;
+	u8				pci_cacheline_sz;
+	u8				pci_lat_timer;
+	u8				pci_hdr_type;
+	u8				pci_bist;
+	u32				pci_cfg_state[64 / sizeof(u32)];
+
+	int				pm_cap;
+
+	/* PHY info */
+	u32				phy_id;
+#define PHY_ID_MASK			0xfffffff0
+#define PHY_ID_BCM5400			0x60008040
+#define PHY_ID_BCM5401			0x60008050
+#define PHY_ID_BCM5411			0x60008070
+#define PHY_ID_BCM5701			0x60008110
+#define PHY_ID_BCM5703			0x60008160
+#define PHY_ID_BCM5704			0x60008190
+#define PHY_ID_BCM8002			0x60010140
+#define PHY_ID_SERDES			0xfeedbee0
+#define PHY_ID_INVALID			0xffffffff
+#define PHY_ID_REV_MASK			0x0000000f
+#define PHY_REV_BCM5401_B0		0x1
+#define PHY_REV_BCM5401_B2		0x3
+#define PHY_REV_BCM5401_C0		0x6
+#define PHY_REV_BCM5411_X0		0x1 /* Found on Netgear GA302T */
+
+	enum phy_led_mode		led_mode;
+
+	char				board_part_number[24];
+
+	/* This macro assumes the passed PHY ID is already masked
+	 * with PHY_ID_MASK.
+	 */
+#define KNOWN_PHY_ID(X)		\
+	((X) == PHY_ID_BCM5400 || (X) == PHY_ID_BCM5401 || \
+	 (X) == PHY_ID_BCM5411 || (X) == PHY_ID_BCM5701 || \
+	 (X) == PHY_ID_BCM5703 || (X) == PHY_ID_BCM5704 || \
+	 (X) == PHY_ID_BCM8002 || (X) == PHY_ID_SERDES)
+
+	struct tg3_hw_stats		*hw_stats;
+	dma_addr_t			stats_mapping;
+};
+
+#endif /* !(_T3_H) */
diff --git a/xen/drivers/pci/Makefile b/xen/drivers/pci/Makefile
new file mode 100644
index 0000000000..1d811d45e3
--- /dev/null
+++ b/xen/drivers/pci/Makefile
@@ -0,0 +1,44 @@
+#
+# Makefile for the PCI bus specific drivers.
+#
+
+include $(BASEDIR)/Rules.mk
+
+OBJS := pci.o quirks.o compat.o names.o setup-res.o
+
+#obj-$(CONFIG_PCI) += pci.o quirks.o compat.o names.o
+#obj-$(CONFIG_PROC_FS) += proc.o
+
+#ifndef CONFIG_SPARC64
+#obj-$(CONFIG_PCI) += setup-res.o
+#endif
+
+#
+# Some architectures use the generic PCI setup functions
+#
+#obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_PARISC) += setup-bus.o
+#obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_ALL_PPC) += setup-bus.o
+#obj-$(CONFIG_DDB5476) += setup-bus.o
+#obj-$(CONFIG_SGI_IP27) += setup-irq.o
+
+#ifndef CONFIG_X86
+#obj-y += syscall.o
+#endif
+
+default: $(OBJS)
+	$(LD) -r -o driver.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core gen-devlist classlist.h devlist.h
+
+names.o: names.c devlist.h classlist.h
+
+devlist.h classlist.h: pci.ids gen-devlist
+	./gen-devlist <pci.ids
+
+gen-devlist: gen-devlist.c
+	$(HOSTCC) $(HOSTCFLAGS) -o gen-devlist gen-devlist.c
+
diff --git a/xen/drivers/pci/compat.c b/xen/drivers/pci/compat.c
new file mode 100644
index 0000000000..e035f860ea
--- /dev/null
+++ b/xen/drivers/pci/compat.c
@@ -0,0 +1,65 @@
+/*
+ *	$Id: compat.c,v 1.1 1998/02/16 10:35:50 mj Exp $
+ *
+ *	PCI Bus Services -- Function For Backward Compatibility
+ *
+ *	Copyright 1998--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+
+int
+pcibios_present(void)
+{
+	return !list_empty(&pci_devices);
+}
+
+int
+pcibios_find_class(unsigned int class, unsigned short index, unsigned char *bus, unsigned char *devfn)
+{
+	const struct pci_dev *dev = NULL;
+	int cnt = 0;
+
+	while ((dev = pci_find_class(class, dev)))
+		if (index == cnt++) {
+			*bus = dev->bus->number;
+			*devfn = dev->devfn;
+			return PCIBIOS_SUCCESSFUL;
+		}
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+
+int
+pcibios_find_device(unsigned short vendor, unsigned short device, unsigned short index,
+		    unsigned char *bus, unsigned char *devfn)
+{
+	const struct pci_dev *dev = NULL;
+	int cnt = 0;
+
+	while ((dev = pci_find_device(vendor, device, dev)))
+		if (index == cnt++) {
+			*bus = dev->bus->number;
+			*devfn = dev->devfn;
+			return PCIBIOS_SUCCESSFUL;
+		}
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+#define PCI_OP(rw,size,type)							\
+int pcibios_##rw##_config_##size (unsigned char bus, unsigned char dev_fn,	\
+				  unsigned char where, unsigned type val)	\
+{										\
+	struct pci_dev *dev = pci_find_slot(bus, dev_fn);			\
+	if (!dev) return PCIBIOS_DEVICE_NOT_FOUND;				\
+	return pci_##rw##_config_##size(dev, where, val);			\
+}
+
+PCI_OP(read, byte, char *)
+PCI_OP(read, word, short *)
+PCI_OP(read, dword, int *)
+PCI_OP(write, byte, char)
+PCI_OP(write, word, short)
+PCI_OP(write, dword, int)
diff --git a/xen/drivers/pci/gen-devlist.c b/xen/drivers/pci/gen-devlist.c
new file mode 100644
index 0000000000..c0c242010e
--- /dev/null
+++ b/xen/drivers/pci/gen-devlist.c
@@ -0,0 +1,130 @@
+/*
+ *	Generate devlist.h and classlist.h from the PCI ID file.
+ *
+ *	(c) 1999--2002 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_NAME_SIZE 79
+
+static void
+pq(FILE *f, const char *c)
+{
+	while (*c) {
+		if (*c == '"')
+			fprintf(f, "\\\"");
+		else {
+			fputc(*c, f);
+			if (*c == '?' && c[1] == '?') {
+				/* Avoid trigraphs */
+				fprintf(f, "\" \"");
+			}
+		}
+		c++;
+	}
+}
+
+int
+main(void)
+{
+	char line[1024], *c, *bra, vend[8];
+	int vendors = 0;
+	int mode = 0;
+	int lino = 0;
+	int vendor_len = 0;
+	FILE *devf, *clsf;
+
+	devf = fopen("devlist.h", "w");
+	clsf = fopen("classlist.h", "w");
+	if (!devf || !clsf) {
+		fprintf(stderr, "Cannot create output file!\n");
+		return 1;
+	}
+
+	while (fgets(line, sizeof(line)-1, stdin)) {
+		lino++;
+		if ((c = strchr(line, '\n')))
+			*c = 0;
+		if (!line[0] || line[0] == '#')
+			continue;
+		if (line[1] == ' ') {
+			if (line[0] == 'C' && strlen(line) > 4 && line[4] == ' ') {
+				vend[0] = line[2];
+				vend[1] = line[3];
+				vend[2] = 0;
+				mode = 2;
+			} else goto err;
+		}
+		else if (line[0] == '\t') {
+			if (line[1] == '\t')
+				continue;
+			switch (mode) {
+			case 1:
+				if (strlen(line) > 5 && line[5] == ' ') {
+					c = line + 5;
+					while (*c == ' ')
+						*c++ = 0;
+					if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
+						/* Too long, try cutting off long description */
+						bra = strchr(c, '[');
+						if (bra && bra > c && bra[-1] == ' ')
+							bra[-1] = 0;
+						if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
+							fprintf(stderr, "Line %d: Device name too long\n", lino);
+							fprintf(stderr, "%s\n", c);
+							return 1;
+						}
+					}
+					fprintf(devf, "\tDEVICE(%s,%s,\"", vend, line+1);
+					pq(devf, c);
+					fputs("\")\n", devf);
+				} else goto err;
+				break;
+			case 2:
+				if (strlen(line) > 3 && line[3] == ' ') {
+					c = line + 3;
+					while (*c == ' ')
+						*c++ = 0;
+					fprintf(clsf, "CLASS(%s%s, \"%s\")\n", vend, line+1, c);
+				} else goto err;
+				break;
+			default:
+				goto err;
+			}
+		} else if (strlen(line) > 4 && line[4] == ' ') {
+			c = line + 4;
+			while (*c == ' ')
+				*c++ = 0;
+			if (vendors)
+				fputs("ENDVENDOR()\n\n", devf);
+			vendors++;
+			strcpy(vend, line);
+			vendor_len = strlen(c);
+			if (vendor_len + 24 > MAX_NAME_SIZE) {
+				fprintf(stderr, "Line %d: Vendor name too long\n", lino);
+				return 1;
+			}
+			fprintf(devf, "VENDOR(%s,\"", vend);
+			pq(devf, c);
+			fputs("\")\n", devf);
+			mode = 1;
+		} else {
+		err:
+			fprintf(stderr, "Line %d: Syntax error in mode %d: %s\n", lino, mode, line);
+			return 1;
+		}
+	}
+	fputs("ENDVENDOR()\n\
+\n\
+#undef VENDOR\n\
+#undef DEVICE\n\
+#undef ENDVENDOR\n", devf);
+	fputs("\n#undef CLASS\n", clsf);
+
+	fclose(devf);
+	fclose(clsf);
+
+	return 0;
+}
diff --git a/xen/drivers/pci/names.c b/xen/drivers/pci/names.c
new file mode 100644
index 0000000000..80674543b0
--- /dev/null
+++ b/xen/drivers/pci/names.c
@@ -0,0 +1,135 @@
+/*
+ *	PCI Class and Device Name Tables
+ *
+ *	Copyright 1993--1999 Drew Eckhardt, Frederic Potter,
+ *	David Mosberger-Tang, Martin Mares
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_PCI_NAMES
+
+struct pci_device_info {
+	unsigned short device;
+	unsigned short seen;
+	const char *name;
+};
+
+struct pci_vendor_info {
+	unsigned short vendor;
+	unsigned short nr;
+	const char *name;
+	struct pci_device_info *devices;
+};
+
+/*
+ * This is ridiculous, but we want the strings in
+ * the .init section so that they don't take up
+ * real memory.. Parse the same file multiple times
+ * to get all the info.
+ */
+#define VENDOR( vendor, name )		static char __vendorstr_##vendor[] __devinitdata = name;
+#define ENDVENDOR()
+#define DEVICE( vendor, device, name ) 	static char __devicestr_##vendor##device[] __devinitdata = name;
+#include "devlist.h"
+
+
+#define VENDOR( vendor, name )		static struct pci_device_info __devices_##vendor[] __devinitdata = {
+#define ENDVENDOR()			};
+#define DEVICE( vendor, device, name )	{ 0x##device, 0, __devicestr_##vendor##device },
+#include "devlist.h"
+
+static struct pci_vendor_info __devinitdata pci_vendor_list[] = {
+#define VENDOR( vendor, name )		{ 0x##vendor, sizeof(__devices_##vendor) / sizeof(struct pci_device_info), __vendorstr_##vendor, __devices_##vendor },
+#define ENDVENDOR()
+#define DEVICE( vendor, device, name )
+#include "devlist.h"
+};
+
+#define VENDORS (sizeof(pci_vendor_list)/sizeof(struct pci_vendor_info))
+
+void __devinit pci_name_device(struct pci_dev *dev)
+{
+	const struct pci_vendor_info *vendor_p = pci_vendor_list;
+	int i = VENDORS;
+	char *name = dev->name;
+
+	do {
+		if (vendor_p->vendor == dev->vendor)
+			goto match_vendor;
+		vendor_p++;
+	} while (--i);
+
+	/* Couldn't find either the vendor nor the device */
+	sprintf(name, "PCI device %04x:%04x", dev->vendor, dev->device);
+	return;
+
+	match_vendor: {
+		struct pci_device_info *device_p = vendor_p->devices;
+		int i = vendor_p->nr;
+
+		while (i > 0) {
+			if (device_p->device == dev->device)
+				goto match_device;
+			device_p++;
+			i--;
+		}
+
+		/* Ok, found the vendor, but unknown device */
+		sprintf(name, "PCI device %04x:%04x (%s)", dev->vendor, dev->device, vendor_p->name);
+		return;
+
+		/* Full match */
+		match_device: {
+			char *n = name + sprintf(name, "%s %s", vendor_p->name, device_p->name);
+			int nr = device_p->seen + 1;
+			device_p->seen = nr;
+			if (nr > 1)
+				sprintf(n, " (#%d)", nr);
+		}
+	}
+}
+
+/*
+ *  Class names. Not in .init section as they are needed in runtime.
+ */
+
+static u16 pci_class_numbers[] = {
+#define CLASS(x,y) 0x##x,
+#include "classlist.h"
+};
+
+static char *pci_class_names[] = {
+#define CLASS(x,y) y,
+#include "classlist.h"
+};
+
+char *
+pci_class_name(u32 class)
+{
+	int i;
+
+	for(i=0; i<sizeof(pci_class_numbers)/sizeof(pci_class_numbers[0]); i++)
+		if (pci_class_numbers[i] == class)
+			return pci_class_names[i];
+	return NULL;
+}
+
+#else
+
+void __devinit pci_name_device(struct pci_dev *dev)
+{
+}
+
+char *
+pci_class_name(u32 class)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_PCI_NAMES */
+
diff --git a/xen/drivers/pci/pci.c b/xen/drivers/pci/pci.c
new file mode 100644
index 0000000000..134e3e2c83
--- /dev/null
+++ b/xen/drivers/pci/pci.c
@@ -0,0 +1,2217 @@
+/*
+ *	$Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $
+ *
+ *	PCI Bus Services, see include/linux/pci.h for further explanation.
+ *
+ *	Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
+ *	David Mosberger-Tang
+ *
+ *	Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+/*#include <linux/string.h>*/
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/spinlock.h>
+/*#include <linux/pm.h>*/
+/*#include <linux/kmod.h>*/		/* for hotplug_path */
+/*#include <linux/bitops.h>*/
+#include <linux/delay.h>
+#include <linux/cache.h>
+
+#include <asm/page.h>
+/*#include <asm/dma.h>*/	/* isa_dma_bridge_buggy */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+LIST_HEAD(pci_root_buses);
+LIST_HEAD(pci_devices);
+
+/**
+ * pci_find_slot - locate PCI device from a given PCI slot
+ * @bus: number of PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI 
+ * device resides and the logical device number within that slot 
+ * in case of multi-function devices.
+ *
+ * Given a PCI bus and slot/function number, the desired PCI device 
+ * is located in system global list of PCI devices.  If the device
+ * is found, a pointer to its data structure is returned.  If no 
+ * device is found, %NULL is returned.
+ */
+struct pci_dev *
+pci_find_slot(unsigned int bus, unsigned int devfn)
+{
+	struct pci_dev *dev;
+
+	pci_for_each_dev(dev) {
+		if (dev->bus->number == bus && dev->devfn == devfn)
+			return dev;
+	}
+	return NULL;
+}
+
+/**
+ * pci_find_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @ss_vendor: PCI subsystem vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @ss_device: PCI subsystem device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @vendor, @device, @ss_vendor and @ss_device, a pointer to its
+ * device structure is returned.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device on the global list.
+ */
+struct pci_dev *
+pci_find_subsys(unsigned int vendor, unsigned int device,
+		unsigned int ss_vendor, unsigned int ss_device,
+		const struct pci_dev *from)
+{
+	struct list_head *n = from ? from->global_list.next : pci_devices.next;
+
+	while (n != &pci_devices) {
+		struct pci_dev *dev = pci_dev_g(n);
+		if ((vendor == PCI_ANY_ID || dev->vendor == vendor) &&
+		    (device == PCI_ANY_ID || dev->device == device) &&
+		    (ss_vendor == PCI_ANY_ID || dev->subsystem_vendor == ss_vendor) &&
+		    (ss_device == PCI_ANY_ID || dev->subsystem_device == ss_device))
+			return dev;
+		n = n->next;
+	}
+	return NULL;
+}
+
+
+/**
+ * pci_find_device - begin or continue searching for a PCI device by vendor/device id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @vendor and @device, a pointer to its device structure is
+ * returned.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device on the global list.
+ */
+struct pci_dev *
+pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
+{
+	return pci_find_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
+}
+
+
+/**
+ * pci_find_class - begin or continue searching for a PCI device by class
+ * @class: search for a PCI device with this class designation
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @class, a pointer to its device structure is
+ * returned.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device
+ * on the global list.
+ */
+struct pci_dev *
+pci_find_class(unsigned int class, const struct pci_dev *from)
+{
+	struct list_head *n = from ? from->global_list.next : pci_devices.next;
+
+	while (n != &pci_devices) {
+		struct pci_dev *dev = pci_dev_g(n);
+		if (dev->class == class)
+			return dev;
+		n = n->next;
+	}
+	return NULL;
+}
+
+/**
+ * pci_find_capability - query for devices' capabilities 
+ * @dev: PCI device to query
+ * @cap: capability code
+ *
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it.  Possible values for @cap:
+ *
+ *  %PCI_CAP_ID_PM           Power Management 
+ *
+ *  %PCI_CAP_ID_AGP          Accelerated Graphics Port 
+ *
+ *  %PCI_CAP_ID_VPD          Vital Product Data 
+ *
+ *  %PCI_CAP_ID_SLOTID       Slot Identification 
+ *
+ *  %PCI_CAP_ID_MSI          Message Signalled Interrupts
+ *
+ *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
+ *
+ *  %PCI_CAP_ID_PCIX         PCI-X
+ */
+int
+pci_find_capability(struct pci_dev *dev, int cap)
+{
+	u16 status;
+	u8 pos, id;
+	int ttl = 48;
+
+	pci_read_config_word(dev, PCI_STATUS, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+	switch (dev->hdr_type) {
+	case PCI_HEADER_TYPE_NORMAL:
+	case PCI_HEADER_TYPE_BRIDGE:
+		pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &pos);
+		break;
+	case PCI_HEADER_TYPE_CARDBUS:
+		pci_read_config_byte(dev, PCI_CB_CAPABILITY_LIST, &pos);
+		break;
+	default:
+		return 0;
+	}
+	while (ttl-- && pos >= 0x40) {
+		pos &= ~3;
+		pci_read_config_byte(dev, pos + PCI_CAP_LIST_ID, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pci_read_config_byte(dev, pos + PCI_CAP_LIST_NEXT, &pos);
+	}
+	return 0;
+}
+
+
+/**
+ * pci_find_parent_resource - return resource region of parent bus of given region
+ * @dev: PCI device structure contains resources to be searched
+ * @res: child resource record for which parent is sought
+ *
+ *  For given resource region of given device, return the resource
+ *  region of parent bus the given region is contained in or where
+ *  it should be allocated from.
+ */
+struct resource *
+pci_find_parent_resource(const struct pci_dev *dev, struct resource *res)
+{
+	const struct pci_bus *bus = dev->bus;
+	int i;
+	struct resource *best = NULL;
+
+	for(i=0; i<4; i++) {
+		struct resource *r = bus->resource[i];
+		if (!r)
+			continue;
+		if (res->start && !(res->start >= r->start && res->end <= r->end))
+			continue;	/* Not contained */
+		if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM))
+			continue;	/* Wrong type */
+		if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH))
+			return r;	/* Exact match */
+		if ((res->flags & IORESOURCE_PREFETCH) && !(r->flags & IORESOURCE_PREFETCH))
+			best = r;	/* Approximating prefetchable by non-prefetchable */
+	}
+	return best;
+}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to be suspended
+ * @state: Power state we're entering
+ *
+ * Transition a device to a new power state, using the Power Management 
+ * Capabilities in the device's config space.
+ *
+ * RETURN VALUE: 
+ * -EINVAL if trying to enter a lower state than we're already in.
+ * 0 if we're already in the requested state.
+ * -EIO if device does not support PCI PM.
+ * 0 if we can successfully change the power state.
+ */
+
+int
+pci_set_power_state(struct pci_dev *dev, int state)
+{
+	int pm;
+	u16 pmcsr;
+
+	/* bound the state we're entering */
+	if (state > 3) state = 3;
+
+	/* Validate current state:
+	 * Can enter D0 from any state, but if we can only go deeper 
+	 * to sleep if we're already in a low power state
+	 */
+	if (state > 0 && dev->current_state > state)
+		return -EINVAL;
+	else if (dev->current_state == state) 
+		return 0;        /* we're already there */
+
+	/* find PCI PM capability in list */
+	pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+	
+	/* abort if the device doesn't support PM capabilities */
+	if (!pm) return -EIO; 
+
+	/* check if this device supports the desired state */
+	if (state == 1 || state == 2) {
+		u16 pmc;
+		pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc);
+		if (state == 1 && !(pmc & PCI_PM_CAP_D1)) return -EIO;
+		else if (state == 2 && !(pmc & PCI_PM_CAP_D2)) return -EIO;
+	}
+
+	/* If we're in D3, force entire word to 0.
+	 * This doesn't affect PME_Status, disables PME_En, and
+	 * sets PowerState to 0.
+	 */
+	if (dev->current_state >= 3)
+		pmcsr = 0;
+	else {
+		pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr);
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		pmcsr |= state;
+	}
+
+	/* enter specified state */
+	pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr);
+
+	/* Mandatory power management transition delays */
+	/* see PCI PM 1.1 5.6.1 table 18 */
+	if(state == 3 || dev->current_state == 3)
+	{
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(HZ/100);
+	}
+	else if(state == 2 || dev->current_state == 2)
+		udelay(200);
+	dev->current_state = state;
+
+	return 0;
+}
+
+/**
+ * pci_save_state - save the PCI configuration space of a device before suspending
+ * @dev: - PCI device that we're dealing with
+ * @buffer: - buffer to hold config space context
+ *
+ * @buffer must be large enough to hold the entire PCI 2.2 config space 
+ * (>= 64 bytes).
+ */
+int
+pci_save_state(struct pci_dev *dev, u32 *buffer)
+{
+	int i;
+	if (buffer) {
+		/* XXX: 100% dword access ok here? */
+		for (i = 0; i < 16; i++)
+			pci_read_config_dword(dev, i * 4,&buffer[i]);
+	}
+	return 0;
+}
+
+/** 
+ * pci_restore_state - Restore the saved state of a PCI device
+ * @dev: - PCI device that we're dealing with
+ * @buffer: - saved PCI config space
+ *
+ */
+int 
+pci_restore_state(struct pci_dev *dev, u32 *buffer)
+{
+	int i;
+
+	if (buffer) {
+		for (i = 0; i < 16; i++)
+			pci_write_config_dword(dev,i * 4, buffer[i]);
+	}
+	/*
+	 * otherwise, write the context information we know from bootup.
+	 * This works around a problem where warm-booting from Windows
+	 * combined with a D3(hot)->D0 transition causes PCI config
+	 * header data to be forgotten.
+	 */	
+	else {
+		for (i = 0; i < 6; i ++)
+			pci_write_config_dword(dev,
+					       PCI_BASE_ADDRESS_0 + (i * 4),
+					       dev->resource[i].start);
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+	}
+	return 0;
+}
+
+/**
+ * pci_enable_device_bars - Initialize some of a device for use
+ * @dev: PCI device to be initialized
+ * @bars: bitmask of BAR's that must be configured
+ *
+ *  Initialize device before it's used by a driver. Ask low-level code
+ *  to enable selected I/O and memory resources. Wake up the device if it 
+ *  was suspended. Beware, this function can fail.
+ */
+ 
+int
+pci_enable_device_bars(struct pci_dev *dev, int bars)
+{
+	int err;
+
+	pci_set_power_state(dev, 0);
+	if ((err = pcibios_enable_device(dev, bars)) < 0)
+		return err;
+	return 0;
+}
+
+/**
+ * pci_enable_device - Initialize device before it's used by a driver.
+ * @dev: PCI device to be initialized
+ *
+ *  Initialize device before it's used by a driver. Ask low-level code
+ *  to enable I/O and memory. Wake up the device if it was suspended.
+ *  Beware, this function can fail.
+ */
+int
+pci_enable_device(struct pci_dev *dev)
+{
+	return pci_enable_device_bars(dev, 0x3F);
+}
+
+/**
+ * pci_disable_device - Disable PCI device after use
+ * @dev: PCI device to be disabled
+ *
+ * Signal to the system that the PCI device is not in use by the system
+ * anymore.  This only involves disabling PCI bus-mastering, if active.
+ */
+void
+pci_disable_device(struct pci_dev *dev)
+{
+	u16 pci_command;
+
+	pci_read_config_word(dev, PCI_COMMAND, &pci_command);
+	if (pci_command & PCI_COMMAND_MASTER) {
+		pci_command &= ~PCI_COMMAND_MASTER;
+		pci_write_config_word(dev, PCI_COMMAND, pci_command);
+	}
+}
+
+/**
+ * pci_enable_wake - enable device to generate PME# when suspended
+ * @dev: - PCI device to operate on
+ * @state: - Current state of device.
+ * @enable: - Flag to enable or disable generation
+ * 
+ * Set the bits in the device's PM Capabilities to generate PME# when
+ * the system is suspended. 
+ *
+ * -EIO is returned if device doesn't have PM Capabilities. 
+ * -EINVAL is returned if device supports it, but can't generate wake events.
+ * 0 if operation is successful.
+ * 
+ */
+int pci_enable_wake(struct pci_dev *dev, u32 state, int enable)
+{
+	int pm;
+	u16 value;
+
+	/* find PCI PM capability in list */
+	pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+
+	/* If device doesn't support PM Capabilities, but request is to disable
+	 * wake events, it's a nop; otherwise fail */
+	if (!pm) 
+		return enable ? -EIO : 0; 
+
+	/* Check device's ability to generate PME# */
+	pci_read_config_word(dev,pm+PCI_PM_PMC,&value);
+
+	value &= PCI_PM_CAP_PME_MASK;
+	value >>= ffs(value);   /* First bit of mask */
+
+	/* Check if it can generate PME# from requested state. */
+	if (!value || !(value & (1 << state))) 
+		return enable ? -EINVAL : 0;
+
+	pci_read_config_word(dev, pm + PCI_PM_CTRL, &value);
+
+	/* Clear PME_Status by writing 1 to it and enable PME# */
+	value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
+
+	if (!enable)
+		value &= ~PCI_PM_CTRL_PME_ENABLE;
+
+	pci_write_config_word(dev, pm + PCI_PM_CTRL, value);
+	
+	return 0;
+}
+
+int
+pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
+{
+	u8 pin;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (!pin)
+		return -1;
+	pin--;
+	while (dev->bus->self) {
+		pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+		dev = dev->bus->self;
+	}
+	*bridge = dev;
+	return pin;
+}
+
+/**
+ *	pci_release_region - Release a PCI bar
+ *	@pdev: PCI device whose resources were previously reserved by pci_request_region
+ *	@bar: BAR to release
+ *
+ *	Releases the PCI I/O and memory resources previously reserved by a
+ *	successful call to pci_request_region.  Call this function only
+ *	after all use of the PCI regions has ceased.
+ */
+void pci_release_region(struct pci_dev *pdev, int bar)
+{
+	if (pci_resource_len(pdev, bar) == 0)
+		return;
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
+		release_region(pci_resource_start(pdev, bar),
+				pci_resource_len(pdev, bar));
+	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
+		release_mem_region(pci_resource_start(pdev, bar),
+				pci_resource_len(pdev, bar));
+}
+
+/**
+ *	pci_request_region - Reserved PCI I/O and memory resource
+ *	@pdev: PCI device whose resources are to be reserved
+ *	@bar: BAR to be reserved
+ *	@res_name: Name to be associated with resource.
+ *
+ *	Mark the PCI region associated with PCI device @pdev BR @bar as
+ *	being reserved by owner @res_name.  Do not access any
+ *	address inside the PCI regions unless this call returns
+ *	successfully.
+ *
+ *	Returns 0 on success, or %EBUSY on error.  A warning
+ *	message is also printed on failure.
+ */
+int pci_request_region(struct pci_dev *pdev, int bar, char *res_name)
+{
+	if (pci_resource_len(pdev, bar) == 0)
+		return 0;
+		
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
+		if (!request_region(pci_resource_start(pdev, bar),
+			    pci_resource_len(pdev, bar), res_name))
+			goto err_out;
+	}
+	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+		if (!request_mem_region(pci_resource_start(pdev, bar),
+				        pci_resource_len(pdev, bar), res_name))
+			goto err_out;
+	}
+	
+	return 0;
+
+err_out:
+	printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
+		pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
+		bar + 1, /* PCI BAR # */
+		pci_resource_len(pdev, bar), pci_resource_start(pdev, bar),
+		pdev->slot_name);
+	return -EBUSY;
+}
+
+
+/**
+ *	pci_release_regions - Release reserved PCI I/O and memory resources
+ *	@pdev: PCI device whose resources were previously reserved by pci_request_regions
+ *
+ *	Releases all PCI I/O and memory resources previously reserved by a
+ *	successful call to pci_request_regions.  Call this function only
+ *	after all use of the PCI regions has ceased.
+ */
+
+void pci_release_regions(struct pci_dev *pdev)
+{
+	int i;
+	
+	for (i = 0; i < 6; i++)
+		pci_release_region(pdev, i);
+}
+
+/**
+ *	pci_request_regions - Reserved PCI I/O and memory resources
+ *	@pdev: PCI device whose resources are to be reserved
+ *	@res_name: Name to be associated with resource.
+ *
+ *	Mark all PCI regions associated with PCI device @pdev as
+ *	being reserved by owner @res_name.  Do not access any
+ *	address inside the PCI regions unless this call returns
+ *	successfully.
+ *
+ *	Returns 0 on success, or %EBUSY on error.  A warning
+ *	message is also printed on failure.
+ */
+int pci_request_regions(struct pci_dev *pdev, char *res_name)
+{
+	int i;
+	
+	for (i = 0; i < 6; i++)
+		if(pci_request_region(pdev, i, res_name))
+			goto err_out;
+	return 0;
+
+err_out:
+	printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
+		pci_resource_flags(pdev, i) & IORESOURCE_IO ? "I/O" : "mem",
+		i + 1, /* PCI BAR # */
+		pci_resource_len(pdev, i), pci_resource_start(pdev, i),
+		pdev->slot_name);
+	while(--i >= 0)
+		pci_release_region(pdev, i);
+		
+	return -EBUSY;
+}
+
+
+/*
+ *  Registration of PCI drivers and handling of hot-pluggable devices.
+ */
+
+static LIST_HEAD(pci_drivers);
+
+/**
+ * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure
+ * @ids: array of PCI device id structures to search in
+ * @dev: the PCI device structure to match against
+ * 
+ * Used by a driver to check whether a PCI device present in the
+ * system is in its list of supported devices.Returns the matching
+ * pci_device_id structure or %NULL if there is no match.
+ */
+const struct pci_device_id *
+pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev)
+{
+	while (ids->vendor || ids->subvendor || ids->class_mask) {
+		if ((ids->vendor == PCI_ANY_ID || ids->vendor == dev->vendor) &&
+		    (ids->device == PCI_ANY_ID || ids->device == dev->device) &&
+		    (ids->subvendor == PCI_ANY_ID || ids->subvendor == dev->subsystem_vendor) &&
+		    (ids->subdevice == PCI_ANY_ID || ids->subdevice == dev->subsystem_device) &&
+		    !((ids->class ^ dev->class) & ids->class_mask))
+			return ids;
+		ids++;
+	}
+	return NULL;
+}
+
+static int
+pci_announce_device(struct pci_driver *drv, struct pci_dev *dev)
+{
+	const struct pci_device_id *id;
+	int ret = 0;
+
+	if (drv->id_table) {
+		id = pci_match_device(drv->id_table, dev);
+		if (!id) {
+			ret = 0;
+			goto out;
+		}
+	} else
+		id = NULL;
+
+	dev_probe_lock();
+	if (drv->probe(dev, id) >= 0) {
+		dev->driver = drv;
+		ret = 1;
+	}
+	dev_probe_unlock();
+out:
+	return ret;
+}
+
+/**
+ * pci_register_driver - register a new pci driver
+ * @drv: the driver structure to register
+ * 
+ * Adds the driver structure to the list of registered drivers
+ * Returns the number of pci devices which were claimed by the driver
+ * during registration.  The driver remains registered even if the
+ * return value is zero.
+ */
+int
+pci_register_driver(struct pci_driver *drv)
+{
+	struct pci_dev *dev;
+	int count = 0;
+
+	list_add_tail(&drv->node, &pci_drivers);
+	pci_for_each_dev(dev) {
+		if (!pci_dev_driver(dev))
+			count += pci_announce_device(drv, dev);
+	}
+	return count;
+}
+
+/**
+ * pci_unregister_driver - unregister a pci driver
+ * @drv: the driver structure to unregister
+ * 
+ * Deletes the driver structure from the list of registered PCI drivers,
+ * gives it a chance to clean up by calling its remove() function for
+ * each device it was responsible for, and marks those devices as
+ * driverless.
+ */
+
+void
+pci_unregister_driver(struct pci_driver *drv)
+{
+	struct pci_dev *dev;
+
+	list_del(&drv->node);
+	pci_for_each_dev(dev) {
+		if (dev->driver == drv) {
+			if (drv->remove)
+				drv->remove(dev);
+			dev->driver = NULL;
+		}
+	}
+}
+
+#ifdef CONFIG_HOTPLUG
+
+#ifndef FALSE
+#define FALSE	(0)
+#define TRUE	(!FALSE)
+#endif
+
+static void
+run_sbin_hotplug(struct pci_dev *pdev, int insert)
+{
+	int i;
+	char *argv[3], *envp[8];
+	char id[20], sub_id[24], bus_id[24], class_id[20];
+
+	if (!hotplug_path[0])
+		return;
+
+	sprintf(class_id, "PCI_CLASS=%04X", pdev->class);
+	sprintf(id, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device);
+	sprintf(sub_id, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, pdev->subsystem_device);
+	sprintf(bus_id, "PCI_SLOT_NAME=%s", pdev->slot_name);
+
+	i = 0;
+	argv[i++] = hotplug_path;
+	argv[i++] = "pci";
+	argv[i] = 0;
+
+	i = 0;
+	/* minimal command environment */
+	envp[i++] = "HOME=/";
+	envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+	
+	/* other stuff we want to pass to /sbin/hotplug */
+	envp[i++] = class_id;
+	envp[i++] = id;
+	envp[i++] = sub_id;
+	envp[i++] = bus_id;
+	if (insert)
+		envp[i++] = "ACTION=add";
+	else
+		envp[i++] = "ACTION=remove";
+	envp[i] = 0;
+
+	call_usermodehelper (argv [0], argv, envp);
+}
+
+/**
+ * pci_announce_device_to_drivers - tell the drivers a new device has appeared
+ * @dev: the device that has shown up
+ *
+ * Notifys the drivers that a new device has appeared, and also notifys
+ * userspace through /sbin/hotplug.
+ */
+void
+pci_announce_device_to_drivers(struct pci_dev *dev)
+{
+	struct list_head *ln;
+
+	for(ln=pci_drivers.next; ln != &pci_drivers; ln=ln->next) {
+		struct pci_driver *drv = list_entry(ln, struct pci_driver, node);
+		if (drv->remove && pci_announce_device(drv, dev))
+			break;
+	}
+
+	/* notify userspace of new hotplug device */
+	run_sbin_hotplug(dev, TRUE);
+}
+
+/**
+ * pci_insert_device - insert a hotplug device
+ * @dev: the device to insert
+ * @bus: where to insert it
+ *
+ * Add a new device to the device lists and notify userspace (/sbin/hotplug).
+ */
+void
+pci_insert_device(struct pci_dev *dev, struct pci_bus *bus)
+{
+	list_add_tail(&dev->bus_list, &bus->devices);
+	list_add_tail(&dev->global_list, &pci_devices);
+#ifdef CONFIG_PROC_FS
+	pci_proc_attach_device(dev);
+#endif
+	pci_announce_device_to_drivers(dev);
+}
+
+static void
+pci_free_resources(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = dev->resource + i;
+		if (res->parent)
+			release_resource(res);
+	}
+}
+
+/**
+ * pci_remove_device - remove a hotplug device
+ * @dev: the device to remove
+ *
+ * Delete the device structure from the device lists and 
+ * notify userspace (/sbin/hotplug).
+ */
+void
+pci_remove_device(struct pci_dev *dev)
+{
+	if (dev->driver) {
+		if (dev->driver->remove)
+			dev->driver->remove(dev);
+		dev->driver = NULL;
+	}
+	list_del(&dev->bus_list);
+	list_del(&dev->global_list);
+	pci_free_resources(dev);
+#ifdef CONFIG_PROC_FS
+	pci_proc_detach_device(dev);
+#endif
+
+	/* notify userspace of hotplug device removal */
+	run_sbin_hotplug(dev, FALSE);
+}
+
+#endif
+
+static struct pci_driver pci_compat_driver = {
+	name: "compat"
+};
+
+/**
+ * pci_dev_driver - get the pci_driver of a device
+ * @dev: the device to query
+ *
+ * Returns the appropriate pci_driver structure or %NULL if there is no 
+ * registered driver for the device.
+ */
+struct pci_driver *
+pci_dev_driver(const struct pci_dev *dev)
+{
+	if (dev->driver)
+		return dev->driver;
+	else {
+		int i;
+		for(i=0; i<=PCI_ROM_RESOURCE; i++)
+			if (dev->resource[i].flags & IORESOURCE_BUSY)
+				return &pci_compat_driver;
+	}
+	return NULL;
+}
+
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+static spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ *  Wrappers for all PCI configuration access functions.  They just check
+ *  alignment, do locking and call the low-level functions pointed to
+ *  by pci_dev->ops.
+ */
+
+#define PCI_byte_BAD 0
+#define PCI_word_BAD (pos & 1)
+#define PCI_dword_BAD (pos & 3)
+
+#define PCI_OP(rw,size,type) \
+int pci_##rw##_config_##size (struct pci_dev *dev, int pos, type value) \
+{									\
+	int res;							\
+	unsigned long flags;						\
+	if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER;	\
+	spin_lock_irqsave(&pci_lock, flags);				\
+	res = dev->bus->ops->rw##_##size(dev, pos, value);		\
+	spin_unlock_irqrestore(&pci_lock, flags);			\
+	return res;							\
+}
+
+PCI_OP(read, byte, u8 *)
+PCI_OP(read, word, u16 *)
+PCI_OP(read, dword, u32 *)
+PCI_OP(write, byte, u8)
+PCI_OP(write, word, u16)
+PCI_OP(write, dword, u32)
+
+/**
+ * pci_set_master - enables bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables bus-mastering on the device and calls pcibios_set_master()
+ * to do the needed arch specific settings.
+ */
+void
+pci_set_master(struct pci_dev *dev)
+{
+	u16 cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (! (cmd & PCI_COMMAND_MASTER)) {
+		DBG("PCI: Enabling bus mastering for device %s\n", dev->slot_name);
+		cmd |= PCI_COMMAND_MASTER;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	pcibios_set_master(dev);
+}
+
+/**
+ * pdev_set_mwi - arch helper function for pcibios_set_mwi
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Helper function for implementation the arch-specific pcibios_set_mwi
+ * function.  Originally copied from drivers/net/acenic.c.
+ * Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>.
+ *
+ * RETURNS: An appriopriate -ERRNO error value on eror, or zero for success.
+ */
+int
+pdev_set_mwi(struct pci_dev *dev)
+{
+	int rc = 0;
+	u8 cache_size;
+
+	/*
+	 * Looks like this is necessary to deal with on all architectures,
+	 * even this %$#%$# N440BX Intel based thing doesn't get it right.
+	 * Ie. having two NICs in the machine, one will have the cache
+	 * line set at boot time, the other will not.
+	 */
+	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cache_size);
+	cache_size <<= 2;
+	if (cache_size != SMP_CACHE_BYTES) {
+		printk(KERN_WARNING "PCI: %s PCI cache line size set incorrectly (%i bytes) by BIOS/FW.\n",
+		       dev->slot_name, cache_size);
+		if (cache_size > SMP_CACHE_BYTES) {
+			printk("PCI: %s cache line size too large - expecting %i.\n", dev->slot_name, SMP_CACHE_BYTES);
+			rc = -EINVAL;
+		} else {
+			printk("PCI: %s PCI cache line size corrected to %i.\n", dev->slot_name, SMP_CACHE_BYTES);
+			pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
+					      SMP_CACHE_BYTES >> 2);
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * pci_set_mwi - enables memory-write-invalidate PCI transaction
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND,
+ * and then calls @pcibios_set_mwi to do the needed arch specific
+ * operations or a generic mwi-prep function.
+ *
+ * RETURNS: An appriopriate -ERRNO error value on eror, or zero for success.
+ */
+int
+pci_set_mwi(struct pci_dev *dev)
+{
+	int rc;
+	u16 cmd;
+
+#ifdef HAVE_ARCH_PCI_MWI
+	rc = pcibios_set_mwi(dev);
+#else
+	rc = pdev_set_mwi(dev);
+#endif
+
+	if (rc)
+		return rc;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (! (cmd & PCI_COMMAND_INVALIDATE)) {
+		DBG("PCI: Enabling Mem-Wr-Inval for device %s\n", dev->slot_name);
+		cmd |= PCI_COMMAND_INVALIDATE;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	
+	return 0;
+}
+
+/**
+ * pci_clear_mwi - disables Memory-Write-Invalidate for device dev
+ * @dev: the PCI device to disable
+ *
+ * Disables PCI Memory-Write-Invalidate transaction on the device
+ */
+void
+pci_clear_mwi(struct pci_dev *dev)
+{
+	u16 cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_INVALIDATE) {
+		cmd &= ~PCI_COMMAND_INVALIDATE;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+}
+
+int
+pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	if (!pci_dma_supported(dev, mask))
+		return -EIO;
+
+	dev->dma_mask = mask;
+
+	return 0;
+}
+    
+int
+pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	if (!pci_dac_dma_supported(dev, mask))
+		return -EIO;
+
+	dev->dma_mask = mask;
+
+	return 0;
+}
+    
+/*
+ * Translate the low bits of the PCI base
+ * to the resource type
+ */
+static inline unsigned int pci_calc_resource_flags(unsigned int flags)
+{
+	if (flags & PCI_BASE_ADDRESS_SPACE_IO)
+		return IORESOURCE_IO;
+
+	if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+		return IORESOURCE_MEM | IORESOURCE_PREFETCH;
+
+	return IORESOURCE_MEM;
+}
+
+/*
+ * Find the extent of a PCI decode, do sanity checks.
+ */
+static u32 pci_size(u32 base, u32 maxbase, unsigned long mask)
+{
+	u32 size = mask & maxbase;	/* Find the significant bits */
+	if (!size)
+		return 0;
+	size = size & ~(size-1);	/* Get the lowest of them to find the decode size */
+	size -= 1;			/* extent = size - 1 */
+	if (base == maxbase && ((base | size) & mask) != mask)
+		return 0;		/* base == maxbase can be valid only
+					   if the BAR has been already
+					   programmed with all 1s */
+	return size;
+}
+
+static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
+{
+	unsigned int pos, reg, next;
+	u32 l, sz;
+	struct resource *res;
+
+	for(pos=0; pos<howmany; pos = next) {
+		next = pos+1;
+		res = &dev->resource[pos];
+		res->name = dev->name;
+		reg = PCI_BASE_ADDRESS_0 + (pos << 2);
+		pci_read_config_dword(dev, reg, &l);
+		pci_write_config_dword(dev, reg, ~0);
+		pci_read_config_dword(dev, reg, &sz);
+		pci_write_config_dword(dev, reg, l);
+		if (!sz || sz == 0xffffffff)
+			continue;
+		if (l == 0xffffffff)
+			l = 0;
+		if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) {
+			sz = pci_size(l, sz, PCI_BASE_ADDRESS_MEM_MASK);
+			if (!sz)
+				continue;
+			res->start = l & PCI_BASE_ADDRESS_MEM_MASK;
+			res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK;
+		} else {
+			sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff);
+			if (!sz)
+				continue;
+			res->start = l & PCI_BASE_ADDRESS_IO_MASK;
+			res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK;
+		}
+		res->end = res->start + (unsigned long) sz;
+		res->flags |= pci_calc_resource_flags(l);
+		if ((l & (PCI_BASE_ADDRESS_SPACE | PCI_BASE_ADDRESS_MEM_TYPE_MASK))
+		    == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)) {
+			pci_read_config_dword(dev, reg+4, &l);
+			next++;
+#if BITS_PER_LONG == 64
+			res->start |= ((unsigned long) l) << 32;
+			res->end = res->start + sz;
+			pci_write_config_dword(dev, reg+4, ~0);
+			pci_read_config_dword(dev, reg+4, &sz);
+			pci_write_config_dword(dev, reg+4, l);
+			if (~sz)
+				res->end = res->start + 0xffffffff +
+						(((unsigned long) ~sz) << 32);
+#else
+			if (l) {
+				printk(KERN_ERR "PCI: Unable to handle 64-bit address for device %s\n", dev->slot_name);
+				res->start = 0;
+				res->flags = 0;
+				continue;
+			}
+#endif
+		}
+	}
+	if (rom) {
+		dev->rom_base_reg = rom;
+		res = &dev->resource[PCI_ROM_RESOURCE];
+		res->name = dev->name;
+		pci_read_config_dword(dev, rom, &l);
+		pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE);
+		pci_read_config_dword(dev, rom, &sz);
+		pci_write_config_dword(dev, rom, l);
+		if (l == 0xffffffff)
+			l = 0;
+		if (sz && sz != 0xffffffff) {
+			sz = pci_size(l, sz, PCI_ROM_ADDRESS_MASK);
+			if (!sz)
+				return;
+			res->flags = (l & PCI_ROM_ADDRESS_ENABLE) |
+			  IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+			res->start = l & PCI_ROM_ADDRESS_MASK;
+			res->end = res->start + (unsigned long) sz;
+		}
+	}
+}
+
+void __devinit pci_read_bridge_bases(struct pci_bus *child)
+{
+	struct pci_dev *dev = child->self;
+	u8 io_base_lo, io_limit_lo;
+	u16 mem_base_lo, mem_limit_lo;
+	unsigned long base, limit;
+	struct resource *res;
+	int i;
+
+	if (!dev)		/* It's a host bus, nothing to read */
+		return;
+
+	if (dev->transparent) {
+		printk("Transparent bridge - %s\n", dev->name);
+		for(i = 0; i < 4; i++)
+			child->resource[i] = child->parent->resource[i];
+		return;
+	}
+
+	for(i=0; i<3; i++)
+		child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+
+	res = child->resource[0];
+	pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
+	pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
+	base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
+	limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
+
+	if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+		u16 io_base_hi, io_limit_hi;
+		pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
+		pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
+		base |= (io_base_hi << 16);
+		limit |= (io_limit_hi << 16);
+	}
+
+	if (base && base <= limit) {
+		res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+		res->start = base;
+		res->end = limit + 0xfff;
+	}
+
+	res = child->resource[1];
+	pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
+	pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
+	base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+	limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+	if (base && base <= limit) {
+		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
+		res->start = base;
+		res->end = limit + 0xfffff;
+	}
+
+	res = child->resource[2];
+	pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
+	pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
+	base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+	limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+
+	if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+		u32 mem_base_hi, mem_limit_hi;
+		pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
+		pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
+#if BITS_PER_LONG == 64
+		base |= ((long) mem_base_hi) << 32;
+		limit |= ((long) mem_limit_hi) << 32;
+#else
+		if (mem_base_hi || mem_limit_hi) {
+			printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name);
+			return;
+		}
+#endif
+	}
+	if (base && base <= limit) {
+		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		res->start = base;
+		res->end = limit + 0xfffff;
+	}
+}
+
+static struct pci_bus * __devinit pci_alloc_bus(void)
+{
+	struct pci_bus *b;
+
+	b = kmalloc(sizeof(*b), GFP_KERNEL);
+	if (b) {
+		memset(b, 0, sizeof(*b));
+		INIT_LIST_HEAD(&b->children);
+		INIT_LIST_HEAD(&b->devices);
+	}
+	return b;
+}
+
+struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
+{
+	struct pci_bus *child;
+	int i;
+
+	/*
+	 * Allocate a new bus, and inherit stuff from the parent..
+	 */
+	child = pci_alloc_bus();
+
+	list_add_tail(&child->node, &parent->children);
+	child->self = dev;
+	dev->subordinate = child;
+	child->parent = parent;
+	child->ops = parent->ops;
+	child->sysdata = parent->sysdata;
+
+	/*
+	 * Set up the primary, secondary and subordinate
+	 * bus numbers.
+	 */
+	child->number = child->secondary = busnr;
+	child->primary = parent->secondary;
+	child->subordinate = 0xff;
+
+	/* Set up default resource pointers and names.. */
+	for (i = 0; i < 4; i++) {
+		child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+		child->resource[i]->name = child->name;
+	}
+
+	return child;
+}
+
+unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus);
+
+/*
+ * If it's a bridge, configure it and scan the bus behind it.
+ * For CardBus bridges, we don't scan behind as the devices will
+ * be handled by the bridge driver itself.
+ *
+ * We need to process bridges in two passes -- first we scan those
+ * already configured by the BIOS and after we are done with all of
+ * them, we proceed to assigning numbers to the remaining buses in
+ * order to avoid overlaps between old and new bus numbers.
+ */
+static int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
+{
+	unsigned int buses;
+	unsigned short cr;
+	struct pci_bus *child;
+	int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
+
+	pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
+	DBG("Scanning behind PCI bridge %s, config %06x, pass %d\n", dev->slot_name, buses & 0xffffff, pass);
+	if ((buses & 0xffff00) && !pcibios_assign_all_busses()) {
+		/*
+		 * Bus already configured by firmware, process it in the first
+		 * pass and just note the configuration.
+		 */
+		if (pass)
+			return max;
+		child = pci_add_new_bus(bus, dev, 0);
+		child->primary = buses & 0xFF;
+		child->secondary = (buses >> 8) & 0xFF;
+		child->subordinate = (buses >> 16) & 0xFF;
+		child->number = child->secondary;
+		if (!is_cardbus) {
+			unsigned int cmax = pci_do_scan_bus(child);
+			if (cmax > max) max = cmax;
+		} else {
+			unsigned int cmax = child->subordinate;
+			if (cmax > max) max = cmax;
+		}
+	} else {
+		/*
+		 * We need to assign a number to this bus which we always
+		 * do in the second pass. We also keep all address decoders
+		 * on the bridge disabled during scanning.  FIXME: Why?
+		 */
+		if (!pass)
+			return max;
+		pci_read_config_word(dev, PCI_COMMAND, &cr);
+		pci_write_config_word(dev, PCI_COMMAND, 0x0000);
+		pci_write_config_word(dev, PCI_STATUS, 0xffff);
+
+		child = pci_add_new_bus(bus, dev, ++max);
+		buses = (buses & 0xff000000)
+		      | ((unsigned int)(child->primary)     <<  0)
+		      | ((unsigned int)(child->secondary)   <<  8)
+		      | ((unsigned int)(child->subordinate) << 16);
+		/*
+		 * We need to blast all three values with a single write.
+		 */
+		pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
+		if (!is_cardbus) {
+			/* Now we can scan all subordinate buses... */
+			max = pci_do_scan_bus(child);
+		} else {
+			/*
+			 * For CardBus bridges, we leave 4 bus numbers
+			 * as cards with a PCI-to-PCI bridge can be
+			 * inserted later.
+			 */
+			max += 3;
+		}
+		/*
+		 * Set the subordinate bus number to its real value.
+		 */
+		child->subordinate = max;
+		pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
+		pci_write_config_word(dev, PCI_COMMAND, cr);
+	}
+	sprintf(child->name, (is_cardbus ? "PCI CardBus #%02x" : "PCI Bus #%02x"), child->number);
+	return max;
+}
+
+/*
+ * Read interrupt line and base address registers.
+ * The architecture-dependent code can tweak these, of course.
+ */
+static void pci_read_irq(struct pci_dev *dev)
+{
+	unsigned char irq;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
+	if (irq)
+		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+	dev->irq = irq;
+}
+
+/**
+ * pci_setup_device - fill in class and map information of a device
+ * @dev: the device structure to fill
+ *
+ * Initialize the device structure with information about the device's 
+ * vendor,class,memory and IO-space addresses,IRQ lines etc.
+ * Called at initialisation of the PCI subsystem and by CardBus services.
+ * Returns 0 on success and -1 if unknown type of device (not normal, bridge
+ * or CardBus).
+ */
+int pci_setup_device(struct pci_dev * dev)
+{
+	u32 class;
+
+	sprintf(dev->slot_name, "%02x:%02x.%d", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+	sprintf(dev->name, "PCI device %04x:%04x", dev->vendor, dev->device);
+	
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+	class >>= 8;				    /* upper 3 bytes */
+	dev->class = class;
+	class >>= 8;
+
+	DBG("Found %02x:%02x [%04x/%04x] %06x %02x\n", dev->bus->number, dev->devfn, dev->vendor, dev->device, class, dev->hdr_type);
+
+	/* "Unknown power state" */
+	dev->current_state = 4;
+
+	switch (dev->hdr_type) {		    /* header type */
+	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
+		if (class == PCI_CLASS_BRIDGE_PCI)
+			goto bad;
+		pci_read_irq(dev);
+		pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
+		pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+		pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
+		break;
+
+	case PCI_HEADER_TYPE_BRIDGE:		    /* bridge header */
+		if (class != PCI_CLASS_BRIDGE_PCI)
+			goto bad;
+		/* The PCI-to-PCI bridge spec requires that subtractive
+		   decoding (i.e. transparent) bridge must have programming
+		   interface code of 0x01. */ 
+		dev->transparent = ((dev->class & 0xff) == 1);
+		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+		break;
+
+	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
+		if (class != PCI_CLASS_BRIDGE_CARDBUS)
+			goto bad;
+		pci_read_irq(dev);
+		pci_read_bases(dev, 1, 0);
+		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
+		break;
+
+	default:				    /* unknown header */
+		printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
+			dev->slot_name, dev->hdr_type);
+		return -1;
+
+	bad:
+		printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
+		       dev->slot_name, class, dev->hdr_type);
+		dev->class = PCI_CLASS_NOT_DEFINED;
+	}
+
+	/* We found a fine healthy device, go go go... */
+	return 0;
+}
+
+/*
+ * Read the config data for a PCI device, sanity-check it
+ * and fill in the dev structure...
+ */
+struct pci_dev * __devinit pci_scan_device(struct pci_dev *temp)
+{
+	struct pci_dev *dev;
+	u32 l;
+
+	if (pci_read_config_dword(temp, PCI_VENDOR_ID, &l))
+		return NULL;
+
+	/* some broken boards return 0 or ~0 if a slot is empty: */
+	if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000)
+		return NULL;
+
+	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	memcpy(dev, temp, sizeof(*dev));
+	dev->vendor = l & 0xffff;
+	dev->device = (l >> 16) & 0xffff;
+
+	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+	   set this higher, assuming the system even supports it.  */
+	dev->dma_mask = 0xffffffff;
+	if (pci_setup_device(dev) < 0) {
+		kfree(dev);
+		dev = NULL;
+	}
+	return dev;
+}
+
+struct pci_dev * __devinit pci_scan_slot(struct pci_dev *temp)
+{
+	struct pci_bus *bus = temp->bus;
+	struct pci_dev *dev;
+	struct pci_dev *first_dev = NULL;
+	int func = 0;
+	int is_multi = 0;
+	u8 hdr_type;
+
+	for (func = 0; func < 8; func++, temp->devfn++) {
+		if (func && !is_multi)		/* not a multi-function device */
+			continue;
+		if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
+			continue;
+		temp->hdr_type = hdr_type & 0x7f;
+
+		dev = pci_scan_device(temp);
+		if (!dev)
+			continue;
+		pci_name_device(dev);
+		if (!func) {
+			is_multi = hdr_type & 0x80;
+			first_dev = dev;
+		}
+
+		/*
+		 * Link the device to both the global PCI device chain and
+		 * the per-bus list of devices.
+		 */
+		list_add_tail(&dev->global_list, &pci_devices);
+		list_add_tail(&dev->bus_list, &bus->devices);
+
+		/* Fix up broken headers */
+		pci_fixup_device(PCI_FIXUP_HEADER, dev);
+	}
+	return first_dev;
+}
+
+unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus)
+{
+	unsigned int devfn, max, pass;
+	struct list_head *ln;
+	struct pci_dev *dev, dev0;
+
+	DBG("Scanning bus %02x\n", bus->number);
+	max = bus->secondary;
+
+	/* Create a device template */
+	memset(&dev0, 0, sizeof(dev0));
+	dev0.bus = bus;
+	dev0.sysdata = bus->sysdata;
+
+	/* Go find them, Rover! */
+	for (devfn = 0; devfn < 0x100; devfn += 8) {
+		dev0.devfn = devfn;
+		pci_scan_slot(&dev0);
+	}
+
+	/*
+	 * After performing arch-dependent fixup of the bus, look behind
+	 * all PCI-to-PCI bridges on this bus.
+	 */
+	DBG("Fixups for bus %02x\n", bus->number);
+	pcibios_fixup_bus(bus);
+	for (pass=0; pass < 2; pass++)
+		for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+			dev = pci_dev_b(ln);
+			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+				max = pci_scan_bridge(bus, dev, max, pass);
+		}
+
+	/*
+	 * We've scanned the bus and so we know all about what's on
+	 * the other side of any bridges that may be on this bus plus
+	 * any devices.
+	 *
+	 * Return how far we've got finding sub-buses.
+	 */
+	DBG("Bus scan for %02x returning with max=%02x\n", bus->number, max);
+	return max;
+}
+
+int __devinit pci_bus_exists(const struct list_head *list, int nr)
+{
+	const struct list_head *l;
+
+	for(l=list->next; l != list; l = l->next) {
+		const struct pci_bus *b = pci_bus_b(l);
+		if (b->number == nr || pci_bus_exists(&b->children, nr))
+			return 1;
+	}
+	return 0;
+}
+
+struct pci_bus * __devinit pci_alloc_primary_bus(int bus)
+{
+	struct pci_bus *b;
+
+	if (pci_bus_exists(&pci_root_buses, bus)) {
+		/* If we already got to this bus through a different bridge, ignore it */
+		DBG("PCI: Bus %02x already known\n", bus);
+		return NULL;
+	}
+
+	b = pci_alloc_bus();
+	list_add_tail(&b->node, &pci_root_buses);
+
+	b->number = b->secondary = bus;
+	b->resource[0] = &ioport_resource;
+	b->resource[1] = &iomem_resource;
+	return b;
+}
+
+struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata)
+{
+	struct pci_bus *b = pci_alloc_primary_bus(bus);
+	if (b) {
+		b->sysdata = sysdata;
+		b->ops = ops;
+		b->subordinate = pci_do_scan_bus(b);
+	}
+	return b;
+}
+
+#ifdef CONFIG_PM
+
+/*
+ * PCI Power management..
+ *
+ * This needs to be done centralized, so that we power manage PCI
+ * devices in the right order: we should not shut down PCI bridges
+ * before we've shut down the devices behind them, and we should
+ * not wake up devices before we've woken up the bridge to the
+ * device.. Eh?
+ *
+ * We do not touch devices that don't have a driver that exports
+ * a suspend/resume function. That is just too dangerous. If the default
+ * PCI suspend/resume functions work for a device, the driver can
+ * easily implement them (ie just have a suspend function that calls
+ * the pci_set_power_state() function).
+ */
+
+static int pci_pm_save_state_device(struct pci_dev *dev, u32 state)
+{
+	int error = 0;
+	if (dev) {
+		struct pci_driver *driver = dev->driver;
+		if (driver && driver->save_state) 
+			error = driver->save_state(dev,state);
+	}
+	return error;
+}
+
+static int pci_pm_suspend_device(struct pci_dev *dev, u32 state)
+{
+	int error = 0;
+	if (dev) {
+		struct pci_driver *driver = dev->driver;
+		if (driver && driver->suspend)
+			error = driver->suspend(dev,state);
+	}
+	return error;
+}
+
+static int pci_pm_resume_device(struct pci_dev *dev)
+{
+	int error = 0;
+	if (dev) {
+		struct pci_driver *driver = dev->driver;
+		if (driver && driver->resume)
+			error = driver->resume(dev);
+	}
+	return error;
+}
+
+static int pci_pm_save_state_bus(struct pci_bus *bus, u32 state)
+{
+	struct list_head *list;
+	int error = 0;
+
+	list_for_each(list, &bus->children) {
+		error = pci_pm_save_state_bus(pci_bus_b(list),state);
+		if (error) return error;
+	}
+	list_for_each(list, &bus->devices) {
+		error = pci_pm_save_state_device(pci_dev_b(list),state);
+		if (error) return error;
+	}
+	return 0;
+}
+
+static int pci_pm_suspend_bus(struct pci_bus *bus, u32 state)
+{
+	struct list_head *list;
+
+	/* Walk the bus children list */
+	list_for_each(list, &bus->children) 
+		pci_pm_suspend_bus(pci_bus_b(list),state);
+
+	/* Walk the device children list */
+	list_for_each(list, &bus->devices)
+		pci_pm_suspend_device(pci_dev_b(list),state);
+	return 0;
+}
+
+static int pci_pm_resume_bus(struct pci_bus *bus)
+{
+	struct list_head *list;
+
+	/* Walk the device children list */
+	list_for_each(list, &bus->devices)
+		pci_pm_resume_device(pci_dev_b(list));
+
+	/* And then walk the bus children */
+	list_for_each(list, &bus->children)
+		pci_pm_resume_bus(pci_bus_b(list));
+	return 0;
+}
+
+static int pci_pm_save_state(u32 state)
+{
+	struct list_head *list;
+	struct pci_bus *bus;
+	int error = 0;
+
+	list_for_each(list, &pci_root_buses) {
+		bus = pci_bus_b(list);
+		error = pci_pm_save_state_bus(bus,state);
+		if (!error)
+			error = pci_pm_save_state_device(bus->self,state);
+	}
+	return error;
+}
+
+static int pci_pm_suspend(u32 state)
+{
+	struct list_head *list;
+	struct pci_bus *bus;
+
+	list_for_each(list, &pci_root_buses) {
+		bus = pci_bus_b(list);
+		pci_pm_suspend_bus(bus,state);
+		pci_pm_suspend_device(bus->self,state);
+	}
+	return 0;
+}
+
+int pci_pm_resume(void)
+{
+	struct list_head *list;
+	struct pci_bus *bus;
+
+	list_for_each(list, &pci_root_buses) {
+		bus = pci_bus_b(list);
+		pci_pm_resume_device(bus->self);
+		pci_pm_resume_bus(bus);
+	}
+	return 0;
+}
+
+static int 
+pci_pm_callback(struct pm_dev *pm_device, pm_request_t rqst, void *data)
+{
+	int error = 0;
+
+	switch (rqst) {
+	case PM_SAVE_STATE:
+		error = pci_pm_save_state((unsigned long)data);
+		break;
+	case PM_SUSPEND:
+		error = pci_pm_suspend((unsigned long)data);
+		break;
+	case PM_RESUME:
+		error = pci_pm_resume();
+		break;
+	default: break;
+	}
+	return error;
+}
+
+#endif
+
+
+#if 0 /* XXX KAF: Only USB uses this stuff -- I think we'll just bin it. */
+
+/*
+ * Pool allocator ... wraps the pci_alloc_consistent page allocator, so
+ * small blocks are easily used by drivers for bus mastering controllers.
+ * This should probably be sharing the guts of the slab allocator.
+ */
+
+struct pci_pool {	/* the pool */
+	struct list_head	page_list;
+	spinlock_t		lock;
+	size_t			blocks_per_page;
+	size_t			size;
+	int			flags;
+	struct pci_dev		*dev;
+	size_t			allocation;
+	char			name [32];
+	wait_queue_head_t	waitq;
+};
+
+struct pci_page {	/* cacheable header for 'allocation' bytes */
+	struct list_head	page_list;
+	void			*vaddr;
+	dma_addr_t		dma;
+	unsigned long		bitmap [0];
+};
+
+#define	POOL_TIMEOUT_JIFFIES	((100 /* msec */ * HZ) / 1000)
+#define	POOL_POISON_BYTE	0xa7
+
+// #define CONFIG_PCIPOOL_DEBUG
+
+
+/**
+ * pci_pool_create - Creates a pool of pci consistent memory blocks, for dma.
+ * @name: name of pool, for diagnostics
+ * @pdev: pci device that will be doing the DMA
+ * @size: size of the blocks in this pool.
+ * @align: alignment requirement for blocks; must be a power of two
+ * @allocation: returned blocks won't cross this boundary (or zero)
+ * @flags: SLAB_* flags (not all are supported).
+ *
+ * Returns a pci allocation pool with the requested characteristics, or
+ * null if one can't be created.  Given one of these pools, pci_pool_alloc()
+ * may be used to allocate memory.  Such memory will all have "consistent"
+ * DMA mappings, accessible by the device and its driver without using
+ * cache flushing primitives.  The actual size of blocks allocated may be
+ * larger than requested because of alignment.
+ *
+ * If allocation is nonzero, objects returned from pci_pool_alloc() won't
+ * cross that size boundary.  This is useful for devices which have
+ * addressing restrictions on individual DMA transfers, such as not crossing
+ * boundaries of 4KBytes.
+ */
+struct pci_pool *
+pci_pool_create (const char *name, struct pci_dev *pdev,
+	size_t size, size_t align, size_t allocation, int flags)
+{
+	struct pci_pool		*retval;
+
+	if (align == 0)
+		align = 1;
+	if (size == 0)
+		return 0;
+	else if (size < align)
+		size = align;
+	else if ((size % align) != 0) {
+		size += align + 1;
+		size &= ~(align - 1);
+	}
+
+	if (allocation == 0) {
+		if (PAGE_SIZE < size)
+			allocation = size;
+		else
+			allocation = PAGE_SIZE;
+		// FIXME: round up for less fragmentation
+	} else if (allocation < size)
+		return 0;
+
+	if (!(retval = kmalloc (sizeof *retval, flags)))
+		return retval;
+
+#ifdef	CONFIG_PCIPOOL_DEBUG
+	flags |= SLAB_POISON;
+#endif
+
+	strncpy (retval->name, name, sizeof retval->name);
+	retval->name [sizeof retval->name - 1] = 0;
+
+	retval->dev = pdev;
+	INIT_LIST_HEAD (&retval->page_list);
+	spin_lock_init (&retval->lock);
+	retval->size = size;
+	retval->flags = flags;
+	retval->allocation = allocation;
+	retval->blocks_per_page = allocation / size;
+	init_waitqueue_head (&retval->waitq);
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+	printk (KERN_DEBUG "pcipool create %s/%s size %d, %d/page (%d alloc)\n",
+		pdev ? pdev->slot_name : NULL, retval->name, size,
+		retval->blocks_per_page, allocation);
+#endif
+
+	return retval;
+}
+
+
+static struct pci_page *
+pool_alloc_page (struct pci_pool *pool, int mem_flags)
+{
+	struct pci_page	*page;
+	int		mapsize;
+
+	mapsize = pool->blocks_per_page;
+	mapsize = (mapsize + BITS_PER_LONG - 1) / BITS_PER_LONG;
+	mapsize *= sizeof (long);
+
+	page = (struct pci_page *) kmalloc (mapsize + sizeof *page, mem_flags);
+	if (!page)
+		return 0;
+	page->vaddr = pci_alloc_consistent (pool->dev,
+					    pool->allocation,
+					    &page->dma);
+	if (page->vaddr) {
+		memset (page->bitmap, 0xff, mapsize);	// bit set == free
+		if (pool->flags & SLAB_POISON)
+			memset (page->vaddr, POOL_POISON_BYTE, pool->allocation);
+		list_add (&page->page_list, &pool->page_list);
+	} else {
+		kfree (page);
+		page = 0;
+	}
+	return page;
+}
+
+
+static inline int
+is_page_busy (int blocks, unsigned long *bitmap)
+{
+	while (blocks > 0) {
+		if (*bitmap++ != ~0UL)
+			return 1;
+		blocks -= BITS_PER_LONG;
+	}
+	return 0;
+}
+
+static void
+pool_free_page (struct pci_pool *pool, struct pci_page *page)
+{
+	dma_addr_t	dma = page->dma;
+
+	if (pool->flags & SLAB_POISON)
+		memset (page->vaddr, POOL_POISON_BYTE, pool->allocation);
+	pci_free_consistent (pool->dev, pool->allocation, page->vaddr, dma);
+	list_del (&page->page_list);
+	kfree (page);
+}
+
+
+/**
+ * pci_pool_destroy - destroys a pool of pci memory blocks.
+ * @pool: pci pool that will be destroyed
+ *
+ * Caller guarantees that no more memory from the pool is in use,
+ * and that nothing will try to use the pool after this call.
+ */
+void
+pci_pool_destroy (struct pci_pool *pool)
+{
+	unsigned long		flags;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+	printk (KERN_DEBUG "pcipool destroy %s/%s\n",
+		pool->dev ? pool->dev->slot_name : NULL,
+		pool->name);
+#endif
+
+	spin_lock_irqsave (&pool->lock, flags);
+	while (!list_empty (&pool->page_list)) {
+		struct pci_page		*page;
+		page = list_entry (pool->page_list.next,
+				struct pci_page, page_list);
+		if (is_page_busy (pool->blocks_per_page, page->bitmap)) {
+			printk (KERN_ERR "pci_pool_destroy %s/%s, %p busy\n",
+				pool->dev ? pool->dev->slot_name : NULL,
+				pool->name, page->vaddr);
+			/* leak the still-in-use consistent memory */
+			list_del (&page->page_list);
+			kfree (page);
+		} else
+			pool_free_page (pool, page);
+	}
+	spin_unlock_irqrestore (&pool->lock, flags);
+	kfree (pool);
+}
+
+
+/**
+ * pci_pool_alloc - get a block of consistent memory
+ * @pool: pci pool that will produce the block
+ * @mem_flags: SLAB_KERNEL or SLAB_ATOMIC
+ * @handle: pointer to dma address of block
+ *
+ * This returns the kernel virtual address of a currently unused block,
+ * and reports its dma address through the handle.
+ * If such a memory block can't be allocated, null is returned.
+ */
+void *
+pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma_addr_t *handle)
+{
+	unsigned long		flags;
+	struct list_head	*entry;
+	struct pci_page		*page;
+	int			map, block;
+	size_t			offset;
+	void			*retval;
+
+restart:
+	spin_lock_irqsave (&pool->lock, flags);
+	list_for_each (entry, &pool->page_list) {
+		int		i;
+		page = list_entry (entry, struct pci_page, page_list);
+		/* only cachable accesses here ... */
+		for (map = 0, i = 0;
+				i < pool->blocks_per_page;
+				i += BITS_PER_LONG, map++) {
+			if (page->bitmap [map] == 0)
+				continue;
+			block = ffz (~ page->bitmap [map]);
+			if ((i + block) < pool->blocks_per_page) {
+				clear_bit (block, &page->bitmap [map]);
+				offset = (BITS_PER_LONG * map) + block;
+				offset *= pool->size;
+				goto ready;
+			}
+		}
+	}
+	if (!(page = pool_alloc_page (pool, mem_flags))) {
+		if (mem_flags == SLAB_KERNEL) {
+			DECLARE_WAITQUEUE (wait, current);
+
+			current->state = TASK_INTERRUPTIBLE;
+			add_wait_queue (&pool->waitq, &wait);
+			spin_unlock_irqrestore (&pool->lock, flags);
+
+			schedule_timeout (POOL_TIMEOUT_JIFFIES);
+
+			current->state = TASK_RUNNING;
+			remove_wait_queue (&pool->waitq, &wait);
+			goto restart;
+		}
+		retval = 0;
+		goto done;
+	}
+
+	clear_bit (0, &page->bitmap [0]);
+	offset = 0;
+ready:
+	retval = offset + page->vaddr;
+	*handle = offset + page->dma;
+done:
+	spin_unlock_irqrestore (&pool->lock, flags);
+	return retval;
+}
+
+
+static struct pci_page *
+pool_find_page (struct pci_pool *pool, dma_addr_t dma)
+{
+	unsigned long		flags;
+	struct list_head	*entry;
+	struct pci_page		*page;
+
+	spin_lock_irqsave (&pool->lock, flags);
+	list_for_each (entry, &pool->page_list) {
+		page = list_entry (entry, struct pci_page, page_list);
+		if (dma < page->dma)
+			continue;
+		if (dma < (page->dma + pool->allocation))
+			goto done;
+	}
+	page = 0;
+done:
+	spin_unlock_irqrestore (&pool->lock, flags);
+	return page;
+}
+
+
+/**
+ * pci_pool_free - put block back into pci pool
+ * @pool: the pci pool holding the block
+ * @vaddr: virtual address of block
+ * @dma: dma address of block
+ *
+ * Caller promises neither device nor driver will again touch this block
+ * unless it is first re-allocated.
+ */
+void
+pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma)
+{
+	struct pci_page		*page;
+	unsigned long		flags;
+	int			map, block;
+
+	if ((page = pool_find_page (pool, dma)) == 0) {
+		printk (KERN_ERR "pci_pool_free %s/%s, %p/%x (bad dma)\n",
+			pool->dev ? pool->dev->slot_name : NULL,
+			pool->name, vaddr, (int) (dma & 0xffffffff));
+		return;
+	}
+#ifdef	CONFIG_PCIPOOL_DEBUG
+	if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
+		printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%x\n",
+			pool->dev ? pool->dev->slot_name : NULL,
+			pool->name, vaddr, (int) (dma & 0xffffffff));
+		return;
+	}
+#endif
+
+	block = dma - page->dma;
+	block /= pool->size;
+	map = block / BITS_PER_LONG;
+	block %= BITS_PER_LONG;
+
+#ifdef	CONFIG_PCIPOOL_DEBUG
+	if (page->bitmap [map] & (1UL << block)) {
+		printk (KERN_ERR "pci_pool_free %s/%s, dma %x already free\n",
+			pool->dev ? pool->dev->slot_name : NULL,
+			pool->name, dma);
+		return;
+	}
+#endif
+	if (pool->flags & SLAB_POISON)
+		memset (vaddr, POOL_POISON_BYTE, pool->size);
+
+	spin_lock_irqsave (&pool->lock, flags);
+	set_bit (block, &page->bitmap [map]);
+	if (waitqueue_active (&pool->waitq))
+		wake_up (&pool->waitq);
+	/*
+	 * Resist a temptation to do
+	 *    if (!is_page_busy(bpp, page->bitmap)) pool_free_page(pool, page);
+	 * it is not interrupt safe. Better have empty pages hang around.
+	 */
+	spin_unlock_irqrestore (&pool->lock, flags);
+}
+
+#endif /* XXX End of PCI pool allocator stuff. */
+
+
+void __devinit  pci_init(void)
+{
+	struct pci_dev *dev;
+
+	pcibios_init();
+
+	pci_for_each_dev(dev) {
+		pci_fixup_device(PCI_FIXUP_FINAL, dev);
+	}
+
+#ifdef CONFIG_PM
+	pm_register(PM_PCI_DEV, 0, pci_pm_callback);
+#endif
+}
+
+static int __devinit pci_setup(char *str)
+{
+	while (str) {
+		char *k = strchr(str, ',');
+		if (k)
+			*k++ = 0;
+		if (*str && (str = pcibios_setup(str)) && *str) {
+			/* PCI layer options should be handled here */
+			printk(KERN_ERR "PCI: Unknown option `%s'\n", str);
+		}
+		str = k;
+	}
+	return 1;
+}
+
+__setup("pci=", pci_setup);
+
+EXPORT_SYMBOL(pci_read_config_byte);
+EXPORT_SYMBOL(pci_read_config_word);
+EXPORT_SYMBOL(pci_read_config_dword);
+EXPORT_SYMBOL(pci_write_config_byte);
+EXPORT_SYMBOL(pci_write_config_word);
+EXPORT_SYMBOL(pci_write_config_dword);
+EXPORT_SYMBOL(pci_devices);
+EXPORT_SYMBOL(pci_root_buses);
+EXPORT_SYMBOL(pci_enable_device_bars);
+EXPORT_SYMBOL(pci_enable_device);
+EXPORT_SYMBOL(pci_disable_device);
+EXPORT_SYMBOL(pci_find_capability);
+EXPORT_SYMBOL(pci_release_regions);
+EXPORT_SYMBOL(pci_request_regions);
+EXPORT_SYMBOL(pci_release_region);
+EXPORT_SYMBOL(pci_request_region);
+EXPORT_SYMBOL(pci_find_class);
+EXPORT_SYMBOL(pci_find_device);
+EXPORT_SYMBOL(pci_find_slot);
+EXPORT_SYMBOL(pci_find_subsys);
+EXPORT_SYMBOL(pci_set_master);
+EXPORT_SYMBOL(pci_set_mwi);
+EXPORT_SYMBOL(pci_clear_mwi);
+EXPORT_SYMBOL(pdev_set_mwi);
+EXPORT_SYMBOL(pci_set_dma_mask);
+EXPORT_SYMBOL(pci_dac_set_dma_mask);
+EXPORT_SYMBOL(pci_assign_resource);
+EXPORT_SYMBOL(pci_register_driver);
+EXPORT_SYMBOL(pci_unregister_driver);
+EXPORT_SYMBOL(pci_dev_driver);
+EXPORT_SYMBOL(pci_match_device);
+EXPORT_SYMBOL(pci_find_parent_resource);
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pci_setup_device);
+EXPORT_SYMBOL(pci_insert_device);
+EXPORT_SYMBOL(pci_remove_device);
+EXPORT_SYMBOL(pci_announce_device_to_drivers);
+EXPORT_SYMBOL(pci_add_new_bus);
+EXPORT_SYMBOL(pci_do_scan_bus);
+EXPORT_SYMBOL(pci_scan_slot);
+EXPORT_SYMBOL(pci_scan_bus);
+EXPORT_SYMBOL(pci_scan_device);
+EXPORT_SYMBOL(pci_read_bridge_bases);
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(pci_proc_attach_device);
+EXPORT_SYMBOL(pci_proc_detach_device);
+EXPORT_SYMBOL(pci_proc_attach_bus);
+EXPORT_SYMBOL(pci_proc_detach_bus);
+EXPORT_SYMBOL(proc_bus_pci_dir);
+#endif
+#endif
+
+EXPORT_SYMBOL(pci_set_power_state);
+EXPORT_SYMBOL(pci_save_state);
+EXPORT_SYMBOL(pci_restore_state);
+EXPORT_SYMBOL(pci_enable_wake);
+
+/* Obsolete functions */
+
+EXPORT_SYMBOL(pcibios_present);
+EXPORT_SYMBOL(pcibios_read_config_byte);
+EXPORT_SYMBOL(pcibios_read_config_word);
+EXPORT_SYMBOL(pcibios_read_config_dword);
+EXPORT_SYMBOL(pcibios_write_config_byte);
+EXPORT_SYMBOL(pcibios_write_config_word);
+EXPORT_SYMBOL(pcibios_write_config_dword);
+EXPORT_SYMBOL(pcibios_find_class);
+EXPORT_SYMBOL(pcibios_find_device);
+
+/* Quirk info */
+
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+EXPORT_SYMBOL(pci_pci_problems);
+
+#if 0
+/* Pool allocator */
+
+EXPORT_SYMBOL (pci_pool_create);
+EXPORT_SYMBOL (pci_pool_destroy);
+EXPORT_SYMBOL (pci_pool_alloc);
+EXPORT_SYMBOL (pci_pool_free);
+
+#endif
diff --git a/xen/drivers/pci/pci.ids b/xen/drivers/pci/pci.ids
new file mode 100644
index 0000000000..c4e4283cc5
--- /dev/null
+++ b/xen/drivers/pci/pci.ids
@@ -0,0 +1,6778 @@
+#
+#	List of PCI ID's
+#
+#	Maintained by Martin Mares <mj@ucw.cz> and other volunteers from the
+#	Linux PCI ID's Project at http://pciids.sf.net/. New data are always
+#	welcome (if they are accurate), we're eagerly expecting new entries,
+#	so if you have anything to contribute, please visit the home page or
+#	send a diff -u against the most recent pci.ids to pci-ids@ucw.cz.
+#
+#	$Id: pci.ids,v 1.46 2002/08/14 17:38:51 mares Exp $
+#
+
+# Vendors, devices and subsystems. Please keep sorted.
+
+# Syntax:
+# vendor  vendor_name
+#	device  device_name				<-- single tab
+#		subvendor subdevice  subsystem_name	<-- two tabs
+
+0000  Gammagraphx, Inc.
+001a  Ascend Communications, Inc.
+0033  Paradyne corp.
+003d  Lockheed Martin-Marietta Corp
+0070  Hauppauge computer works Inc.
+0100  Ncipher Corp Ltd
+0675  Dynalink
+	1700  IS64PH ISDN Adapter
+	1702  IS64PH ISDN Adapter
+# Wrong ID used in subsystem ID of VIA USB controllers.
+0925  VIA Technologies, Inc. (Wrong ID)
+09c1  Arris
+	0704  CM 200E Cable Modem
+0a89  BREA Technologies Inc
+0e11  Compaq Computer Corporation
+	0001  PCI to EISA Bridge
+	0002  PCI to ISA Bridge
+	0049  NC7132 Gigabit Upgrade Module
+	004a  NC6136 Gigabit Server Adapter
+	0508  Netelligent 4/16 Token Ring
+	1000  Triflex/Pentium Bridge, Model 1000
+	2000  Triflex/Pentium Bridge, Model 2000
+	3032  QVision 1280/p
+	3033  QVision 1280/p
+	3034  QVision 1280/p
+	4000  4000 [Triflex]
+	6010  HotPlug PCI Bridge 6010
+	7020  USB Controller
+	a0ec  Fibre Channel Host Controller
+	a0f0  Advanced System Management Controller
+	a0f3  Triflex PCI to ISA Bridge
+	a0f7  PCI Hotplug Controller
+		8086 002a  PCI Hotplug Controller A
+		8086 002b  PCI Hotplug Controller B
+	a0f8  ZFMicro Chipset USB
+	a0fc  Fibre Channel Host Controller
+	ae10  Smart-2/P RAID Controller
+		0e11 4030  Smart-2/P Array Controller
+		0e11 4031  Smart-2SL Array Controller
+		0e11 4032  Smart Array Controller
+		0e11 4033  Smart 3100ES Array Controller
+	ae29  MIS-L
+	ae2a  MPC
+	ae2b  MIS-E
+	ae31  System Management Controller
+	ae32  Netelligent 10/100
+	ae33  Triflex Dual EIDE Controller
+	ae34  Netelligent 10
+	ae35  Integrated NetFlex-3/P
+	ae40  Netelligent 10/100 Dual
+	ae43  ProLiant Integrated Netelligent 10/100
+	ae69  CETUS-L
+	ae6c  Northstar
+	ae6d  NorthStar CPU to PCI Bridge
+	b011  Integrated Netelligent 10/100
+	b012  Netelligent 10 T/2
+	b01e  NC3120 Fast Ethernet NIC
+	b01f  NC3122 Fast Ethernet NIC
+	b02f  NC1120 Ethernet NIC
+	b030  Netelligent WS 5100
+	b04a  10/100 TX PCI Intel WOL UTP Controller
+	b060  Smart Array 5300 Controller
+	b0c6  NC3161 Fast Ethernet NIC
+	b0c7  NC3160 Fast Ethernet NIC
+	b0d7  NC3121 Fast Ethernet NIC
+	b0dd  NC3131 Fast Ethernet NIC
+	b0de  NC3132 Fast Ethernet Module
+	b0df  NC6132 Gigabit Module
+	b0e0  NC6133 Gigabit Module
+	b0e1  NC3133 Fast Ethernet Module
+	b123  NC6134 Gigabit NIC
+	b134  NC3163 Fast Ethernet NIC
+	b13c  NC3162 Fast Ethernet NIC
+	b144  NC3123 Fast Ethernet NIC
+	b163  NC3134 Fast Ethernet NIC
+	b164  NC3165 Fast Ethernet Upgrade Module
+	b178  Smart Array 5i/532
+	b1a4  NC7131 Gigabit Server Adapter
+	f130  NetFlex-3/P ThunderLAN 1.0
+	f150  NetFlex-3/P ThunderLAN 2.3
+0e55  HaSoTec GmbH
+1000  LSI Logic / Symbios Logic (formerly NCR)
+	0001  53c810
+		1000 1000  8100S
+	0002  53c820
+	0003  53c825
+	0004  53c815
+	0005  53c810AP
+	0006  53c860
+	000a  53c1510
+	000b  53c896
+	000c  53c895
+		1de1 3907  DC-390U2W
+	000d  53c885
+	000f  53c875
+		0e11 7004  Embedded Ultra Wide SCSI Controller
+		1092 8760  FirePort 40 Dual SCSI Controller
+		1de1 3904  DC390F Ultra Wide SCSI Controller
+	0010  53c895
+		0e11 4040  Integrated Array Controller
+		0e11 4048  Integrated Array Controller
+	0012  53c895a
+	0013  53c875a
+	0020  53c1010 Ultra3 SCSI Adapter
+		1de1 1020  DC-390U3W
+	0021  53c1010 66MHz  Ultra3 SCSI Adapter
+	0030  53c1030
+		1028 1010  LSI U320 SCSI Controller
+	0040  53c1035
+	008f  53c875J
+		1092 8000  FirePort 40 SCSI Controller
+		1092 8760  FirePort 40 Dual SCSI Host Adapter
+	0621  FC909
+	0622  FC929
+	0623  FC929 LAN
+	0624  FC919
+	0625  FC919 LAN
+	0626  FC929X
+	0627  FC929X LAN
+	0628  FC919X
+	0629  FC919X LAN
+	0701  83C885 NT50 DigitalScape Fast Ethernet
+	0702  Yellowfin G-NIC gigabit ethernet
+		1318 0000  PEI100X
+	0901  61C102
+	1000  63C815
+	1960  PowerEdge Expandable RAID Controller 4
+		1028 0518  PowerEdge Expandable RAID Controller 4/DC
+		1028 0520  PowerEdge Expandable RAID Controller 4/SC
+		1028 0531  PowerEdge Expandable RAID Controller 4/QC
+1001  Kolter Electronic
+	0010  PCI 1616 Measurement card with 32 digital I/O lines
+	0011  OPTO-PCI Opto-Isolated digital I/O board
+	0012  PCI-AD/DA Analogue I/O board
+	0013  PCI-OPTO-RELAIS Digital I/O board with relay outputs
+	0014  PCI-Counter/Timer Counter Timer board
+	0015  PCI-DAC416 Analogue output board
+	0016  PCI-MFB Analogue I/O board
+	0017  PROTO-3 PCI Prototyping board
+	9100  INI-9100/9100W SCSI Host
+1002  ATI Technologies Inc
+	4158  68800AX [Mach32]
+	4242  Radeon 8500 DV
+		1002 02aa  Radeon 8500 AIW DV Edition
+	4354  215CT [Mach64 CT]
+	4358  210888CX [Mach64 CX]
+	4554  210888ET [Mach64 ET]
+	4654  Mach64 VT
+	4742  3D Rage Pro AGP 1X/2X
+		1002 0040  Rage Pro Turbo AGP 2X
+		1002 0044  Rage Pro Turbo AGP 2X
+		1002 0061  Rage Pro AIW AGP 2X
+		1002 0062  Rage Pro AIW AGP 2X
+		1002 0063  Rage Pro AIW AGP 2X
+		1002 0080  Rage Pro Turbo AGP 2X
+		1002 0084  Rage Pro Turbo AGP 2X
+		1002 4742  Rage Pro Turbo AGP 2X
+		1002 8001  Rage Pro Turbo AGP 2X
+		1028 0082  Rage Pro Turbo AGP 2X
+		1028 4082  Optiplex GX1 Onboard Display Adapter
+		1028 8082  Rage Pro Turbo AGP 2X
+		1028 c082  Rage Pro Turbo AGP 2X
+		8086 4152  Xpert 98D AGP 2X
+		8086 464a  Rage Pro Turbo AGP 2X
+	4744  3D Rage Pro AGP 1X
+		1002 4744  Rage Pro Turbo AGP
+	4747  3D Rage Pro
+	4749  3D Rage Pro
+		1002 0061  Rage Pro AIW
+		1002 0062  Rage Pro AIW
+	474c  Rage XC
+	474d  Rage XL AGP 2X
+		1002 0004  Xpert 98 RXL AGP 2X
+		1002 0008  Xpert 98 RXL AGP 2X
+		1002 0080  Rage XL AGP 2X
+		1002 0084  Xpert 98 AGP 2X
+		1002 474d  Rage XL AGP
+		1033 806a  Rage XL AGP
+	474e  Rage XC AGP
+		1002 474e  Rage XC AGP
+	474f  Rage XL
+		1002 0008  Rage XL
+		1002 474f  Rage XL
+	4750  3D Rage Pro 215GP
+		1002 0040  Rage Pro Turbo
+		1002 0044  Rage Pro Turbo
+		1002 0080  Rage Pro Turbo
+		1002 0084  Rage Pro Turbo
+		1002 4750  Rage Pro Turbo
+	4751  3D Rage Pro 215GQ
+	4752  Rage XL
+		1002 0008  Rage XL
+		1002 4752  Rage XL
+	4753  Rage XC
+		1002 4753  Rage XC
+	4754  3D Rage I/II 215GT [Mach64 GT]
+	4755  3D Rage II+ 215GTB [Mach64 GTB]
+	4756  3D Rage IIC 215IIC [Mach64 GT IIC]
+		1002 4756  Rage IIC
+	4757  3D Rage IIC AGP
+		1002 4757  Rage IIC AGP
+		1028 0089  Rage 3D IIC
+		1028 4082  Rage 3D IIC
+		1028 8082  Rage 3D IIC
+		1028 c082  Rage 3D IIC
+	4758  210888GX [Mach64 GX]
+	4759  3D Rage IIC
+	475a  3D Rage IIC AGP
+		1002 0087  Rage 3D IIC
+		1002 475a  Rage IIC AGP
+	4c42  3D Rage LT Pro AGP-133
+		0e11 b0e8  Rage 3D LT Pro
+		0e11 b10e  3D Rage LT Pro (Compaq Armada 1750)
+		1002 0040  Rage LT Pro AGP 2X
+		1002 0044  Rage LT Pro AGP 2X
+		1002 4c42  Rage LT Pro AGP 2X
+		1002 8001  Rage LT Pro AGP 2X
+		1028 0085  Rage 3D LT Pro
+	4c44  3D Rage LT Pro AGP-66
+	4c45  Rage Mobility M3 AGP
+	4c46  Rage Mobility M3 AGP 2x
+	4c47  3D Rage LT-G 215LG
+	4c49  3D Rage LT Pro
+		1002 0004  Rage LT Pro
+		1002 0040  Rage LT Pro
+		1002 0044  Rage LT Pro
+		1002 4c49  Rage LT Pro
+	4c4d  Rage Mobility P/M AGP 2x
+		1002 0084  Xpert 98 AGP 2X (Mobility)
+	4c4e  Rage Mobility L AGP 2x
+	4c50  3D Rage LT Pro
+		1002 4c50  Rage LT Pro
+	4c51  3D Rage LT Pro
+	4c52  Rage Mobility P/M
+	4c53  Rage Mobility L
+	4c54  264LT [Mach64 LT]
+	4c57  Radeon Mobility M7 LW
+		1028 00e6  Radeon Mobility M7 LW (Dell Inspiron 8100)
+	4c58  Radeon Mobility M7 LX [Radeon Mobility FireGL 7800]
+	4c59  Radeon Mobility M6 LY
+		1014 0235  ThinkPad A30p (2653-64G)
+		1014 0239  ThinkPad X22/X23/X24
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	4c5a  Radeon Mobility M6 LZ
+	4d46  Rage Mobility M4 AGP
+	4d4c  Rage Mobility M4 AGP
+	5041  Rage 128 PA/PRO
+	5042  Rage 128 PB/PRO AGP 2x
+	5043  Rage 128 PC/PRO AGP 4x
+	5044  Rage 128 PD/PRO TMDS
+		1002 0028  Rage 128 AIW
+		1002 0029  Rage 128 AIW
+	5045  Rage 128 PE/PRO AGP 2x TMDS
+	5046  Rage 128 PF/PRO AGP 4x TMDS
+		1002 0004  Rage Fury Pro
+		1002 0008  Rage Fury Pro/Xpert 2000 Pro
+		1002 0014  Rage Fury Pro
+		1002 0018  Rage Fury Pro/Xpert 2000 Pro
+		1002 0028  Rage 128 Pro AIW AGP
+		1002 002a  Rage 128 Pro AIW AGP
+		1002 0048  Rage Fury Pro
+		1002 2000  Rage Fury MAXX AGP 4x (TMDS) (VGA device)
+		1002 2001  Rage Fury MAXX AGP 4x (TMDS) (Extra device?!)
+	5047  Rage 128 PG/PRO
+	5048  Rage 128 PH/PRO AGP 2x
+	5049  Rage 128 PI/PRO AGP 4x
+	504a  Rage 128 PJ/PRO TMDS
+	504b  Rage 128 PK/PRO AGP 2x TMDS
+	504c  Rage 128 PL/PRO AGP 4x TMDS
+	504d  Rage 128 PM/PRO
+	504e  Rage 128 PN/PRO AGP 2x
+	504f  Rage 128 PO/PRO AGP 4x
+	5050  Rage 128 PP/PRO TMDS
+		1002 0008  Xpert 128
+	5051  Rage 128 PQ/PRO AGP 2x TMDS
+	5052  Rage 128 PR/PRO AGP 4x TMDS
+	5053  Rage 128 PS/PRO
+	5054  Rage 128 PT/PRO AGP 2x
+	5055  Rage 128 PU/PRO AGP 4x
+	5056  Rage 128 PV/PRO TMDS
+	5057  Rage 128 PW/PRO AGP 2x TMDS
+	5058  Rage 128 PX/PRO AGP 4x TMDS
+	5144  Radeon QD
+		1002 0008  Radeon 7000/Radeon VE
+		1002 0009  Radeon 7000/Radeon
+		1002 000a  Radeon 7000/Radeon
+		1002 001a  Radeon 7000/Radeon
+		1002 0029  Radeon AIW
+		1002 0038  Radeon 7000/Radeon
+		1002 0039  Radeon 7000/Radeon
+		1002 008a  Radeon 7000/Radeon
+		1002 00ba  Radeon 7000/Radeon
+		1002 0139  Radeon 7000/Radeon
+		1002 028a  Radeon 7000/Radeon
+		1002 02aa  Radeon AIW
+		1002 053a  Radeon 7000/Radeon
+	5145  Radeon QE
+	5146  Radeon QF
+	5147  Radeon QG
+	5148  Radeon R200 QH [Radeon 8500]
+		1002 0152  FireGL 8800
+		1002 0172  FireGL 8700
+	5149  Radeon R200 QI
+	514a  Radeon R200 QJ
+	514b  Radeon R200 QK
+	514c  Radeon R200 QL [Radeon 8500 LE]
+		1002 003a  Radeon R200 QL [Radeon 8500 LE]
+		1002 013a  Radeon 8500
+	5157  Radeon 7500 QW
+		1002 013a  Radeon 7500
+		174b 7161  Radeon RV200 QW [Radeon 7500 LE]
+	5158  Radeon 7500 QX
+	5159  Radeon VE QY
+		1002 000a  Radeon 7000/Radeon VE
+		1002 0038  Radeon 7000/Radeon VE
+		1002 003a  Radeon 7000/Radeon VE
+		1002 00ba  Radeon 7000/Radeon VE
+		1002 013a  Radeon 7000/Radeon VE
+		174b 7112  Radeon 7000 64M TVO
+	515a  Radeon VE QZ
+	5168  Radeon R200 Qh
+	5169  Radeon R200 Qi
+	516a  Radeon R200 Qj
+	516b  Radeon R200 Qk
+	5245  Rage 128 RE/SG
+		1002 0008  Xpert 128
+		1002 0028  Rage 128 AIW
+		1002 0029  Rage 128 AIW
+		1002 0068  Rage 128 AIW
+	5246  Rage 128 RF/SG AGP
+		1002 0004  Magnum/Xpert 128/Xpert 99
+		1002 0008  Magnum/Xpert128/X99/Xpert2000
+		1002 0028  Rage 128 AIW AGP
+		1002 0044  Rage Fury/Xpert 128/Xpert 2000
+		1002 0068  Rage 128 AIW AGP
+		1002 0448  Rage Fury
+	5247  Rage 128 RG
+	524b  Rage 128 RK/VR
+	524c  Rage 128 RL/VR AGP
+		1002 0008  Xpert 99/Xpert 2000
+		1002 0088  Xpert 99
+	5345  Rage 128 SE/4x
+	5346  Rage 128 SF/4x AGP 2x
+	5347  Rage 128 SG/4x AGP 4x
+	5348  Rage 128 SH
+	534b  Rage 128 SK/4x
+	534c  Rage 128 SL/4x AGP 2x
+	534d  Rage 128 SM/4x AGP 4x
+		1002 0008  Xpert 99/Xpert 2000
+		1002 0018  Xpert 2000
+	534e  Rage 128 4x
+	5354  Mach 64 VT
+		1002 5654  Mach 64 reference
+	5446  Rage 128 Pro Ultra TF
+		1002 0004  Rage Fury Pro
+		1002 0008  Rage Fury Pro/Xpert 2000 Pro
+		1002 0018  Rage Fury Pro/Xpert 2000 Pro
+		1002 0028  Rage 128 AIW Pro AGP
+		1002 0029  Rage 128 AIW
+		1002 002a  Rage 128 AIW Pro AGP
+		1002 002b  Rage 128 AIW
+		1002 0048  Xpert 2000 Pro
+	544c  Rage 128 Pro Ultra TL
+	5452  Rage 128 Pro Ultra TR
+		1002 001c  Rage 128 Pro 4XL
+		103c 1279  Rage 128 Pro 4XL
+	5453  Rage 128 Pro Ultra TS
+	5454  Rage 128 Pro Ultra TT
+	5455  Rage 128 Pro Ultra TU
+	5654  264VT [Mach64 VT]
+		1002 5654  Mach64VT Reference
+	5655  264VT3 [Mach64 VT3]
+	5656  264VT4 [Mach64 VT4]
+1003  ULSI Systems
+	0201  US201
+1004  VLSI Technology Inc
+	0005  82C592-FC1
+	0006  82C593-FC1
+	0007  82C594-AFC2
+	0008  82C596/7 [Wildcat]
+	0009  82C597-AFC2
+	000c  82C541 [Lynx]
+	000d  82C543 [Lynx]
+	0101  82C532
+	0102  82C534 [Eagle]
+	0103  82C538
+	0104  82C535
+	0105  82C147
+	0200  82C975
+	0280  82C925
+	0304  QSound ThunderBird PCI Audio
+		1004 0304  QSound ThunderBird PCI Audio
+		122d 1206  DSP368 Audio
+		1483 5020  XWave Thunder 3D Audio
+	0305  QSound ThunderBird PCI Audio Gameport
+		1004 0305  QSound ThunderBird PCI Audio Gameport
+		122d 1207  DSP368 Audio Gameport
+		1483 5021  XWave Thunder 3D Audio Gameport
+	0306  QSound ThunderBird PCI Audio Support Registers
+		1004 0306  QSound ThunderBird PCI Audio Support Registers
+		122d 1208  DSP368 Audio Support Registers
+		1483 5022  XWave Thunder 3D Audio Support Registers
+	0702  VAS96011 [Golden Gate II]
+1005  Avance Logic Inc. [ALI]
+	2064  ALG2032/2064
+	2128  ALG2364A
+	2301  ALG2301
+	2302  ALG2302
+	2364  ALG2364
+	2464  ALG2364A
+	2501  ALG2564A/25128A
+1006  Reply Group
+1007  NetFrame Systems Inc
+1008  Epson
+100a  Phoenix Technologies
+100b  National Semiconductor Corporation
+	0001  DP83810
+	0002  87415/87560 IDE
+	000e  87560 Legacy I/O
+	000f  FireWire Controller
+	0011  NS87560 National PCI System I/O
+	0012  USB Controller
+	0020  DP83815 (MacPhyter) Ethernet Controller
+	0022  DP83820 10/100/1000 Ethernet Controller
+	0500  SCx200 Bridge
+	0501  SCx200 SMI
+	0502  SCx200 IDE
+	0503  SCx200 Audio
+	0504  SCx200 Video
+	0505  SCx200 XBus
+	d001  87410 IDE
+100c  Tseng Labs Inc
+	3202  ET4000/W32p rev A
+	3205  ET4000/W32p rev B
+	3206  ET4000/W32p rev C
+	3207  ET4000/W32p rev D
+	3208  ET6000
+	4702  ET6300
+100d  AST Research Inc
+100e  Weitek
+	9000  P9000 Viper
+	9001  P9000 Viper
+	9002  P9000 Viper
+	9100  P9100 Viper Pro/SE
+1010  Video Logic, Ltd.
+1011  Digital Equipment Corporation
+	0001  DECchip 21050
+	0002  DECchip 21040 [Tulip]
+	0004  DECchip 21030 [TGA]
+	0007  NVRAM [Zephyr NVRAM]
+	0008  KZPSA [KZPSA]
+	0009  DECchip 21140 [FasterNet]
+		1025 0310  21140 Fast Ethernet
+		10b8 2001  SMC9332BDT EtherPower 10/100
+		10b8 2002  SMC9332BVT EtherPower T4 10/100
+		10b8 2003  SMC9334BDT EtherPower 10/100 (1-port)
+		1109 2400  ANA-6944A/TX Fast Ethernet
+		1112 2300  RNS2300 Fast Ethernet
+		1112 2320  RNS2320 Fast Ethernet
+		1112 2340  RNS2340 Fast Ethernet
+		1113 1207  EN-1207-TX Fast Ethernet
+		1186 1100  DFE-500TX Fast Ethernet
+		1186 1112  DFE-570TX Fast Ethernet
+		1186 1140  DFE-660 Cardbus Ethernet 10/100
+		1186 1142  DFE-660 Cardbus Ethernet 10/100
+		11f6 0503  Freedomline Fast Ethernet
+		1282 9100  AEF-380TXD Fast Ethernet
+		1385 1100  FA310TX Fast Ethernet
+		2646 0001  KNE100TX Fast Ethernet
+	000a  21230 Video Codec
+	000d  PBXGB [TGA2]
+	000f  DEFPA
+	0014  DECchip 21041 [Tulip Pass 3]
+		1186 0100  DE-530+
+	0016  DGLPB [OPPO]
+	0019  DECchip 21142/43
+		1011 500a  DE500A Fast Ethernet
+		1011 500b  DE500B Fast Ethernet
+		1014 0001  10/100 EtherJet Cardbus
+		1025 0315  ALN315 Fast Ethernet
+		1033 800c  PC-9821-CS01
+		1033 800d  PC-9821NR-B06
+		108d 0016  Rapidfire 2327 10/100 Ethernet
+		108d 0017  GoCard 2250 Ethernet 10/100 Cardbus
+		10b8 2005  SMC8032DT Extreme Ethernet 10/100
+		10b8 8034  SMC8034 Extreme Ethernet 10/100
+		10ef 8169  Cardbus Fast Ethernet
+		1109 2a00  ANA-6911A/TX Fast Ethernet
+		1109 2b00  ANA-6911A/TXC Fast Ethernet
+		1109 3000  ANA-6922/TX Fast Ethernet
+		1113 1207  Cheetah Fast Ethernet
+		1113 2220  Cardbus Fast Ethernet
+		115d 0002  Cardbus Ethernet 10/100
+		1179 0203  Fast Ethernet
+		1179 0204  Cardbus Fast Ethernet
+		1186 1100  DFE-500TX Fast Ethernet
+		1186 1101  DFE-500TX Fast Ethernet
+		1186 1102  DFE-500TX Fast Ethernet
+		1259 2800  AT-2800Tx Fast Ethernet
+		1266 0004  Eagle Fast EtherMAX
+		12af 0019  NetFlyer Cardbus Fast Ethernet
+		1374 0001  Cardbus Ethernet Card 10/100
+		1374 0002  Cardbus Ethernet Card 10/100
+		1374 0007  Cardbus Ethernet Card 10/100
+		1374 0008  Cardbus Ethernet Card 10/100
+		1395 0001  10/100 Ethernet CardBus PC Card
+		13d1 ab01  EtherFast 10/100 Cardbus (PCMPC200)
+		8086 0001  EtherExpress PRO/100 Mobile CardBus 32
+	001a  Farallon PN9000SX
+	0021  DECchip 21052
+	0022  DECchip 21150
+	0023  DECchip 21150
+	0024  DECchip 21152
+	0025  DECchip 21153
+	0026  DECchip 21154
+	0034  56k Modem Cardbus
+		1374 0003  56k Modem Cardbus
+	0045  DECchip 21553
+	0046  DECchip 21554
+		0e11 4050  Integrated Smart Array
+		0e11 4051  Integrated Smart Array
+		0e11 4058  Integrated Smart Array
+		103c 10c2  Hewlett-Packard NetRAID-4M
+		12d9 000a  VoIP PCI Gateway
+		9005 0365  Adaptec 5400S
+		9005 1364  Dell PowerEdge RAID Controller 2
+		9005 1365  Dell PowerEdge RAID Controller 2
+		e4bf 1000  CC8-1-BLUES
+	1065  StrongARM DC21285
+		1069 0020  DAC960P / DAC1164P
+1012  Micronics Computers Inc
+1013  Cirrus Logic
+	0038  GD 7548
+	0040  GD 7555 Flat Panel GUI Accelerator
+	004c  GD 7556 Video/Graphics LCD/CRT Ctrlr
+	00a0  GD 5430/40 [Alpine]
+	00a2  GD 5432 [Alpine]
+	00a4  GD 5434-4 [Alpine]
+	00a8  GD 5434-8 [Alpine]
+	00ac  GD 5436 [Alpine]
+	00b0  GD 5440
+	00b8  GD 5446
+	00bc  GD 5480
+		1013 00bc  CL-GD5480
+	00d0  GD 5462
+	00d2  GD 5462 [Laguna I]
+	00d4  GD 5464 [Laguna]
+	00d5  GD 5464 BD [Laguna]
+	00d6  GD 5465 [Laguna]
+		13ce 8031  Barco Metheus 2 Megapixel, Dual Head
+		13cf 8031  Barco Metheus 2 Megapixel, Dual Head
+	00e8  GD 5436U
+	1100  CL 6729
+	1110  PD 6832 PCMCIA/CardBus Ctrlr
+	1112  PD 6834 PCMCIA/CardBus Ctrlr
+	1113  PD 6833 PCMCIA/CardBus Ctrlr
+	1200  GD 7542 [Nordic]
+	1202  GD 7543 [Viking]
+	1204  GD 7541 [Nordic Light]
+	4400  CD 4400
+	6001  CS 4610/11 [CrystalClear SoundFusion Audio Accelerator]
+		1014 1010  CS4610 SoundFusion Audio Accelerator
+	6003  CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
+		1013 4280  Crystal SoundFusion PCI Audio Accelerator
+		1681 0050  Hercules Game Theater XP
+		1681 a011  Hercules Fortissimo III 7.1
+	6004  CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
+	6005  Crystal CS4281 PCI Audio
+		1013 4281  Crystal CS4281 PCI Audio
+		10cf 10a8  Crystal CS4281 PCI Audio
+		10cf 10a9  Crystal CS4281 PCI Audio
+		10cf 10aa  Crystal CS4281 PCI Audio
+		10cf 10ab  Crystal CS4281 PCI Audio
+		10cf 10ac  Crystal CS4281 PCI Audio
+		10cf 10ad  Crystal CS4281 PCI Audio
+		10cf 10b4  Crystal CS4281 PCI Audio
+		1179 0001  Crystal CS4281 PCI Audio
+		14c0 000c  Crystal CS4281 PCI Audio
+1014  IBM
+	0002  PCI to MCA Bridge
+	0005  Alta Lite
+	0007  Alta MP
+	000a  Fire Coral
+	0017  CPU to PCI Bridge
+	0018  TR Auto LANstreamer
+	001b  GXT-150P
+	001c  Carrera
+	001d  82G2675
+	0020  MCA
+	0022  IBM27-82351
+	002d  Python
+	002e  ServeRAID-3x
+	0036  Miami
+	003a  CPU to PCI Bridge
+	003e  16/4 Token ring UTP/STP controller
+		1014 003e  Token-Ring Adapter
+		1014 00cd  Token-Ring Adapter + Wake-On-LAN
+		1014 00ce  16/4 Token-Ring Adapter 2
+		1014 00cf  16/4 Token-Ring Adapter Special
+		1014 00e4  High-Speed 100/16/4 Token-Ring Adapter
+		1014 00e5  16/4 Token-Ring Adapter 2 + Wake-On-LAN
+		1014 016d  iSeries 2744 Card
+	0045  SSA Adapter
+	0046  MPIC interrupt controller
+	0047  PCI to PCI Bridge
+	0048  PCI to PCI Bridge
+	0049  Warhead SCSI Controller
+	004e  ATM Controller (14104e00)
+	004f  ATM Controller (14104f00)
+	0050  ATM Controller (14105000)
+	0053  25 MBit ATM Controller
+	0057  MPEG PCI Bridge
+	005c  i82557B 10/100
+	007c  ATM Controller (14107c00)
+	007d  3780IDSP [MWave]
+	0090  GXT 3000P
+		1014 008e  GXT-3000P
+	0095  20H2999 PCI Docking Bridge
+	0096  Chukar chipset SCSI controller
+		1014 0097  iSeries 2778 DASD IOA
+		1014 0098  iSeries 2763 DASD IOA
+		1014 0099  iSeries 2748 DASD IOA
+	00a5  ATM Controller (1410a500)
+	00a6  ATM 155MBPS MM Controller (1410a600)
+	00b7  256-bit Graphics Rasterizer [Fire GL1]
+		1902 00b8  Fire GL1
+	00be  ATM 622MBPS Controller (1410be00)
+	00fc  CPC710 Dual Bridge and Memory Controller (PCI-64)
+	0105  CPC710 Dual Bridge and Memory Controller (PCI-32)
+	0142  Yotta Video Compositor Input
+		1014 0143  Yotta Input Controller (ytin)
+	0144  Yotta Video Compositor Output
+		1014 0145  Yotta Output Controller (ytout)
+	0156  405GP PLB to PCI Bridge
+	01a7  PCI-X to PCI-X Bridge
+	01bd  Netfinity ServeRAID controller
+	01be  ServeRAID-4M
+	01bf  ServeRAID-4L
+	022e  ServeRAID-4H
+	ffff  MPIC-2 interrupt controller
+1015  LSI Logic Corp of Canada
+1016  ICL Personal Systems
+1017  SPEA Software AG
+	5343  SPEA 3D Accelerator
+1018  Unisys Systems
+1019  Elitegroup Computer Systems
+101a  AT&T GIS (NCR)
+	0005  100VG ethernet
+101b  Vitesse Semiconductor
+101c  Western Digital
+	0193  33C193A
+	0196  33C196A
+	0197  33C197A
+	0296  33C296A
+	3193  7193
+	3197  7197
+	3296  33C296A
+	4296  34C296
+	9710  Pipeline 9710
+	9712  Pipeline 9712
+	c24a  90C
+101e  American Megatrends Inc.
+	1960  MegaRAID
+		101e 0471  MegaRAID 471 Enterprise 1600 RAID Controller
+		101e 0475  MegaRAID 475 Express 500 RAID Controller
+		101e 0493  MegaRAID 493 Elite 1600 RAID Controller
+		1028 0471  PowerEdge RAID Controller 3/QC
+		1028 0475  PowerEdge RAID Controller 3/SC
+		1028 0493  PowerEdge RAID Controller 3/DC
+		1028 0511  PowerEdge Cost Effective RAID Controller ATA100/4Ch
+	9010  MegaRAID 428 Ultra RAID Controller
+	9030  EIDE Controller
+	9031  EIDE Controller
+	9032  EIDE & SCSI Controller
+	9033  SCSI Controller
+	9040  Multimedia card
+	9060  MegaRAID 434 Ultra GT RAID Controller
+	9063  MegaRAC
+		101e 0767  Dell Remote Assistant Card 2
+101f  PictureTel
+1020  Hitachi Computer Products
+1021  OKI Electric Industry Co. Ltd.
+1022  Advanced Micro Devices [AMD]
+	2000  79c970 [PCnet LANCE]
+		1014 2000  NetFinity 10/100 Fast Ethernet
+		103c 104c  Ethernet with LAN remote power Adapter
+		103c 1064  Ethernet with LAN remote power Adapter
+		103c 1065  Ethernet with LAN remote power Adapter
+		103c 106c  Ethernet with LAN remote power Adapter
+		103c 106e  Ethernet with LAN remote power Adapter
+		103c 10ea  Ethernet with LAN remote power Adapter
+		1113 1220  EN1220 10/100 Fast Ethernet
+		1259 2450  AT-2450 10/100 Fast Ethernet
+		1259 2454  AT-2450v4 10Mb Ethernet Adapter
+		1259 2700  AT-2700TX 10/100 Fast Ethernet
+		1259 2701  AT-2700FX 100Mb Ethernet
+	2001  79c978 [HomePNA]
+		1092 0a78  Multimedia Home Network Adapter
+		1668 0299  ActionLink Home Network Adapter
+	2020  53c974 [PCscsi]
+	2040  79c974
+	3000  ELanSC520 Microcontroller
+	7006  AMD-751 [Irongate] System Controller
+	7007  AMD-751 [Irongate] AGP Bridge
+	700c  AMD-760 MP [IGD4-2P] System Controller
+	700d  AMD-760 MP [IGD4-2P] AGP Bridge
+	700e  AMD-760 [IGD4-1P] System Controller
+	700f  AMD-760 [IGD4-1P] AGP Bridge
+	7400  AMD-755 [Cobra] ISA
+	7401  AMD-755 [Cobra] IDE
+	7403  AMD-755 [Cobra] ACPI
+	7404  AMD-755 [Cobra] USB
+	7408  AMD-756 [Viper] ISA
+	7409  AMD-756 [Viper] IDE
+	740b  AMD-756 [Viper] ACPI
+	740c  AMD-756 [Viper] USB
+	7410  AMD-766 [ViperPlus] ISA
+	7411  AMD-766 [ViperPlus] IDE
+	7413  AMD-766 [ViperPlus] ACPI
+	7414  AMD-766 [ViperPlus] USB
+	7440  AMD-768 [Opus] ISA
+		1043 8044  A7M-D Mainboard
+	7441  AMD-768 [Opus] IDE
+	7443  AMD-768 [Opus] ACPI
+		1043 8044  A7M-D Mainboard
+	7445  AMD-768 [Opus] Audio
+	7448  AMD-768 [Opus] PCI
+	7449  AMD-768 [Opus] USB
+	7454  AMD-8151 System Controller
+	7455  AMD-8151 AGP Bridge
+	7460  AMD-8111 PCI
+	7461  AMD-8111 USB
+	7462  AMD-8111 Ethernet
+	7468  AMD-8111 LPC
+	7469  AMD-8111 IDE
+	746a  AMD-8111 SMBus 2.0
+	746b  AMD-8111 ACPI
+	746d  AMD-8111 AC97 Audio
+	756b  AMD-8111 ACPI
+1023  Trident Microsystems
+	0194  82C194
+	2000  4DWave DX
+	2001  4DWave NX
+	8400  CyberBlade/i7
+		1023 8400  CyberBlade i7 AGP
+	8420  CyberBlade/i7d
+		0e11 b15a  CyberBlade i7 AGP
+	8500  CyberBlade/i1
+	8520  CyberBlade i1
+		0e11 b16e  CyberBlade i1 AGP
+		1023 8520  CyberBlade i1 AGP
+	8820  CyberBlade XPAi1
+	9320  TGUI 9320
+	9350  GUI Accelerator
+	9360  Flat panel GUI Accelerator
+	9382  Cyber 9382 [Reference design]
+	9383  Cyber 9383 [Reference design]
+	9385  Cyber 9385 [Reference design]
+	9386  Cyber 9386
+	9388  Cyber 9388
+	9397  Cyber 9397
+	939a  Cyber 9397DVD
+	9420  TGUI 9420
+	9430  TGUI 9430
+	9440  TGUI 9440
+	9460  TGUI 9460
+	9470  TGUI 9470
+	9520  Cyber 9520
+	9525  Cyber 9525
+		10cf 1094  Lifebook C6155
+	9540  Cyber 9540
+	9660  TGUI 9660/938x/968x
+	9680  TGUI 9680
+	9682  TGUI 9682
+	9683  TGUI 9683
+	9685  ProVIDIA 9685
+	9750  3DImage 9750
+		1014 9750  3DImage 9750
+		1023 9750  3DImage 9750
+	9753  TGUI 9753
+	9754  TGUI 9754
+	9759  TGUI 975
+	9783  TGUI 9783
+	9785  TGUI 9785
+	9850  3DImage 9850
+	9880  Blade 3D PCI/AGP
+		1023 9880  Blade 3D
+	9910  CyberBlade/XP
+	9930  CyberBlade/XPm
+1024  Zenith Data Systems
+1025  Acer Incorporated [ALI]
+	1435  M1435
+	1445  M1445
+	1449  M1449
+	1451  M1451
+	1461  M1461
+	1489  M1489
+	1511  M1511
+	1512  ALI M1512 Aladdin
+	1513  M1513
+	1521  ALI M1521 Aladdin III CPU Bridge
+		10b9 1521  ALI M1521 Aladdin III CPU Bridge
+	1523  ALI M1523 ISA Bridge
+		10b9 1523  ALI M1523 ISA Bridge
+	1531  M1531 Northbridge [Aladdin IV/IV+]
+	1533  M1533 PCI-to-ISA Bridge
+		10b9 1533  ALI M1533 Aladdin IV/V ISA South Bridge
+	1535  M1535 PCI Bridge + Super I/O + FIR
+	1541  M1541 Northbridge [Aladdin V]
+		10b9 1541  ALI M1541 Aladdin V/V+ AGP+PCI North Bridge
+	1542  M1542 Northbridge [Aladdin V]
+	1543  M1543 PCI-to-ISA Bridge + Super I/O + FIR
+	1561  M1561 Northbridge [Aladdin 7]
+	1621  M1621 Northbridge [Aladdin-Pro II]
+	1631  M1631 Northbridge+3D Graphics [Aladdin TNT2]
+	1641  M1641 Northbridge [Aladdin-Pro IV]
+	1647  M1647 [MaGiK1] PCI North Bridge
+	3141  M3141
+	3143  M3143
+	3145  M3145
+	3147  M3147
+	3149  M3149
+	3151  M3151
+	3307  M3307 MPEG-I Video Controller
+	3309  M3309 MPEG-II Video w/ Software Audio Decoder
+	3321  M3321 MPEG-II Audio/Video Decoder
+	5212  M4803
+	5215  ALI PCI EIDE Controller
+	5217  M5217H
+	5219  M5219
+	5225  M5225
+	5229  M5229
+	5235  M5235
+	5237  M5237 PCI USB Host Controller
+	5240  EIDE Controller
+	5241  PCMCIA Bridge
+	5242  General Purpose Controller
+	5243  PCI to PCI Bridge Controller
+	5244  Floppy Disk Controller
+	5247  M1541 PCI to PCI Bridge
+	5251  M5251 P1394 Controller
+	5427  PCI to AGP Bridge
+	5451  M5451 PCI AC-Link Controller Audio Device
+	5453  M5453 PCI AC-Link Controller Modem Device
+	7101  M7101 PCI PMU Power Management Controller
+		10b9 7101  M7101 PCI PMU Power Management Controller
+1028  Dell Computer Corporation
+	0001  PowerEdge Expandable RAID Controller 2/Si
+		1028 0001  PowerEdge Expandable RAID Controller 2/Si
+	0002  PowerEdge Expandable RAID Controller 3
+		1028 0002  PowerEdge Expandable RAID Controller 3/Di
+		1028 00d1  PowerEdge Expandable RAID Controller 3/Di
+		1028 00d9  PowerEdge Expandable RAID Controller 3/Di
+	0003  PowerEdge Expandable RAID Controller 3/Si
+		1028 0003  PowerEdge Expandable RAID Controller 3/Si
+	0004  PowerEdge Expandable RAID Controller 3/Si
+		1028 00d0  PowerEdge Expandable RAID Controller 3/Si
+	0005  PowerEdge Expandable RAID Controller 3/Di
+	0006  PowerEdge Expandable RAID Controller 3/Di
+	0007  Remote Assistant Card 3
+	0008  PowerEdge Expandable RAID Controller 3/Di
+	000a  PowerEdge Expandable RAID Controller 3
+		1027 0121  PowerEdge Expandable RAID Controller 3/Di
+		1028 0106  PowerEdge Expandable RAID Controller 3/Di
+		1028 011b  PowerEdge Expandable RAID Controller 3/Di
+	000c  Embedded Systems Management Device 4
+	000e  PowerEdge Expandable RAID Controller
+	000f  PowerEdge Expandable RAID Controller 4/Di
+1029  Siemens Nixdorf IS
+102a  LSI Logic
+	0000  HYDRA
+	0010  ASPEN
+102b  Matrox Graphics, Inc.
+# DJ: I've a suspicion that 0010 is a duplicate of 0d10.
+	0010  MGA-I [Impression?]
+	0518  MGA-II [Athena]
+	0519  MGA 2064W [Millennium]
+	051a  MGA 1064SG [Mystique]
+		102b 1100  MGA-1084SG Mystique
+		102b 1200  MGA-1084SG Mystique
+		1100 102b  MGA-1084SG Mystique
+		110a 0018  Scenic Pro C5 (D1025)
+	051b  MGA 2164W [Millennium II]
+		102b 051b  MGA-2164W Millennium II
+		102b 1100  MGA-2164W Millennium II
+		102b 1200  MGA-2164W Millennium II
+	051e  MGA 1064SG [Mystique] AGP
+	051f  MGA 2164W [Millennium II] AGP
+	0520  MGA G200
+		102b dbc2  G200 Multi-Monitor
+		102b dbc8  G200 Multi-Monitor
+		102b dbe2  G200 Multi-Monitor
+		102b dbe8  G200 Multi-Monitor
+		102b ff03  Millennium G200 SD
+		102b ff04  Marvel G200
+	0521  MGA G200 AGP
+		1014 ff03  Millennium G200 AGP
+		102b 48e9  Mystique G200 AGP
+		102b 48f8  Millennium G200 SD AGP
+		102b 4a60  Millennium G200 LE AGP
+		102b 4a64  Millennium G200 AGP
+		102b c93c  Millennium G200 AGP
+		102b c9b0  Millennium G200 AGP
+		102b c9bc  Millennium G200 AGP
+		102b ca60  Millennium G250 LE AGP
+		102b ca6c  Millennium G250 AGP
+		102b dbbc  Millennium G200 AGP
+		102b dbc2  Millennium G200 MMS (Dual G200)
+		102b dbc3  G200 Multi-Monitor
+		102b dbc8  Millennium G200 MMS (Dual G200)
+		102b dbd2  G200 Multi-Monitor
+		102b dbd3  G200 Multi-Monitor
+		102b dbd4  G200 Multi-Monitor
+		102b dbd5  G200 Multi-Monitor
+		102b dbd8  G200 Multi-Monitor
+		102b dbd9  G200 Multi-Monitor
+		102b dbe2  Millennium G200 MMS (Quad G200)
+		102b dbe3  G200 Multi-Monitor
+		102b dbe8  Millennium G200 MMS (Quad G200)
+		102b dbf2  G200 Multi-Monitor
+		102b dbf3  G200 Multi-Monitor
+		102b dbf4  G200 Multi-Monitor
+		102b dbf5  G200 Multi-Monitor
+		102b dbf8  G200 Multi-Monitor
+		102b dbf9  G200 Multi-Monitor
+		102b f806  Mystique G200 Video AGP
+		102b ff00  MGA-G200 AGP
+		102b ff02  Mystique G200 AGP
+		102b ff03  Millennium G200 AGP
+		102b ff04  Marvel G200 AGP
+		110a 0032  MGA-G200 AGP
+	0525  MGA G400 AGP
+		0e11 b16f  MGA-G400 AGP
+		102b 0328  Millennium G400 16Mb SDRAM
+		102b 0338  Millennium G400 16Mb SDRAM
+		102b 0378  Millennium G400 32Mb SDRAM
+		102b 0541  Millennium G450 Dual Head
+		102b 0542  Millennium G450 Dual Head LX
+		102b 0543  Millennium G450 Single Head LX
+		102b 0641  Millennium G450 32Mb SDRAM Dual Head
+		102b 0642  Millennium G450 32Mb SDRAM Dual Head LX
+		102b 0643  Millennium G450 32Mb SDRAM Single Head LX
+		102b 07c0  Millennium G450 Dual Head LE
+		102b 07c1  Millennium G450 SDR Dual Head LE
+		102b 0d41  Millennium G450 Dual Head PCI
+		102b 0d42  Millennium G450 Dual Head LX PCI
+		102b 0e00  Marvel G450 eTV
+		102b 0e01  Marvel G450 eTV
+		102b 0e02  Marvel G450 eTV
+		102b 0e03  Marvel G450 eTV
+		102b 0f80  Millennium G450 Low Profile
+		102b 0f81  Millennium G450 Low Profile
+		102b 0f82  Millennium G450 Low Profile DVI
+		102b 0f83  Millennium G450 Low Profile DVI
+		102b 19d8  Millennium G400 16Mb SGRAM
+		102b 19f8  Millennium G400 32Mb SGRAM
+		102b 2159  Millennium G400 Dual Head 16Mb
+		102b 2179  Millennium G400 MAX/Dual Head 32Mb
+		102b 217d  Millennium G400 Dual Head Max
+		102b 23c0  Millennium G450
+		102b 23c1  Millennium G450
+		102b 23c2  Millennium G450 DVI
+		102b 23c3  Millennium G450 DVI
+		102b 2f58  Millennium G400
+		102b 2f78  Millennium G400
+		102b 3693  Marvel G400 AGP
+		102b 5dd0  4Sight II
+		102b 5f50  4Sight II
+		102b 5f51  4Sight II
+		102b 5f52  4Sight II
+		102b 9010  Millennium G400 Dual Head
+		1458 0400  GA-G400
+		1705 0001  Digital First Millennium G450 32MB SGRAM
+		1705 0002  Digital First Millennium G450 16MB SGRAM
+		1705 0003  Digital First Millennium G450 32MB
+		1705 0004  Digital First Millennium G450 16MB
+		b16f 0e11  MGA-G400 AGP
+	0527  MGA Parhelia AGP
+		102b 0840  Parhelia 128Mb
+	0d10  MGA Ultima/Impression
+	1000  MGA G100 [Productiva]
+		102b ff01  Productiva G100
+		102b ff05  Productiva G100 Multi-Monitor
+	1001  MGA G100 [Productiva] AGP
+		102b 1001  MGA-G100 AGP
+		102b ff00  MGA-G100 AGP
+		102b ff01  MGA-G100 Productiva AGP
+		102b ff03  Millennium G100 AGP
+		102b ff04  MGA-G100 AGP
+		102b ff05  MGA-G100 Productiva AGP Multi-Monitor
+		110a 001e  MGA-G100 AGP
+	2007  MGA Mistral
+	2527  MGA G550 AGP
+		102b 0f83  Millennium G550
+		102b 0f84  Millennium G550 Dual Head DDR 32Mb
+		102b 1e41  Millennium G550
+	4536  VIA Framegrabber
+	6573  Shark 10/100 Multiport SwitchNIC
+102c  Chips and Technologies
+	00b8  F64310
+	00c0  F69000 HiQVideo
+		102c 00c0  F69000 HiQVideo
+	00d0  F65545
+	00d8  F65545
+	00dc  F65548
+	00e0  F65550
+	00e4  F65554
+	00e5  F65555 HiQVPro
+		0e11 b049  Armada 1700 Laptop Display Controller
+	00f0  F68554
+	00f4  F68554 HiQVision
+	00f5  F68555
+	0c30  F69030
+102d  Wyse Technology Inc.
+	50dc  3328 Audio
+102e  Olivetti Advanced Technology
+102f  Toshiba America
+	0009  r4x00
+	0020  ATM Meteor 155
+		102f 00f8  ATM Meteor 155
+1030  TMC Research
+1031  Miro Computer Products AG
+	5601  DC20 ASIC
+	5607  Video I/O & motion JPEG compressor
+	5631  Media 3D
+	6057  MiroVideo DC10/DC30+
+1032  Compaq
+1033  NEC Corporation
+	0001  PCI to 486-like bus Bridge
+	0002  PCI to VL98 Bridge
+	0003  ATM Controller
+	0004  R4000 PCI Bridge
+	0005  PCI to 486-like bus Bridge
+	0006  GUI Accelerator
+	0007  PCI to UX-Bus Bridge
+	0008  GUI Accelerator
+	0009  GUI Accelerator for W98
+	001a  [Nile II]
+	0021  Vrc4373 [Nile I]
+	0029  PowerVR PCX1
+	002a  PowerVR 3D
+	0035  USB
+		1179 0001  USB
+		12ee 7000  Root Hub
+	003e  NAPCCARD Cardbus Controller
+	0046  PowerVR PCX2 [midas]
+	005a  Vrc5074 [Nile 4]
+	0063  Firewarden
+	0067  PowerVR Neon 250 Chipset
+		1010 0020  PowerVR Neon 250 AGP 32Mb
+		1010 0080  PowerVR Neon 250 AGP 16Mb
+		1010 0088  PowerVR Neon 250 16Mb
+		1010 0090  PowerVR Neon 250 AGP 16Mb
+		1010 0098  PowerVR Neon 250 16Mb
+		1010 00a0  PowerVR Neon 250 AGP 32Mb
+		1010 00a8  PowerVR Neon 250 32Mb
+		1010 0120  PowerVR Neon 250 AGP 32Mb
+	0074  56k Voice Modem
+		1033 8014  RCV56ACF 56k Voice Modem
+	009b  Vrc5476
+	00a6  VRC5477 AC97
+	00cd  IEEE 1394 [OrangeLink] Host Controller
+		12ee 8011  Root hub
+	00e0  USB 2.0
+		12ee 7001  Root hub
+1034  Framatome Connectors USA Inc.
+1035  Comp. & Comm. Research Lab
+1036  Future Domain Corp.
+	0000  TMC-18C30 [36C70]
+1037  Hitachi Micro Systems
+1038  AMP, Inc
+1039  Silicon Integrated Systems [SiS]
+	0001  5591/5592 AGP
+	0002  SG86C202
+	0006  85C501/2/3
+	0008  85C503/5513
+	0009  ACPI
+	0018  SiS85C503/5513 (LPC Bridge)
+	0200  5597/5598/6326 VGA
+		1039 0000  SiS5597 SVGA (Shared RAM)
+	0204  82C204
+	0205  SG86C205
+	0300  300/200
+		107d 2720  Leadtek WinFast VR300
+	0406  85C501/2
+	0496  85C496
+	0530  530 Host
+	0540  540 Host
+	0597  5513C
+	0601  85C601
+	0620  620 Host
+	0630  630 Host
+	0633  633 Host
+	0635  635 Host
+	0645  645 Host
+	0646  645DX Host
+	0650  650 Host
+	0730  730 Host
+	0733  733 Host
+	0735  735 Host
+	0740  740 Host
+	0745  745 Host
+	0900  SiS900 10/100 Ethernet
+		1039 0900  SiS900 10/100 Ethernet Adapter
+	0961  SiS961 [MuTIOL Media IO]
+	3602  83C602
+	5107  5107
+	5300  SiS540 PCI Display Adapter
+	5401  486 PCI Chipset
+	5511  5511/5512
+	5513  5513 [IDE]
+		1039 5513  SiS5513 EIDE Controller (A,B step)
+	5517  5517
+	5571  5571
+	5581  5581 Pentium Chipset
+	5582  5582
+	5591  5591/5592 Host
+	5596  5596 Pentium Chipset
+	5597  5597 [SiS5582]
+	5600  5600 Host
+	6204  Video decoder & MPEG interface
+	6205  VGA Controller
+	6236  6236 3D-AGP
+	6300  SiS630 GUI Accelerator+3D
+	6306  6306 3D-AGP
+		1039 6306  SiS530,620 GUI Accelerator+3D
+	6326  86C326 5598/6326
+		1039 6326  SiS6326 GUI Accelerator
+		1092 0a50  SpeedStar A50
+		1092 0a70  SpeedStar A70
+		1092 4910  SpeedStar A70
+		1092 4920  SpeedStar A70
+		1569 6326  SiS6326 GUI Accelerator
+	7001  7001
+	7007  FireWire Controller
+	7012  SiS7012 PCI Audio Accelerator
+	7013  56k Winmodem (Smart Link HAMR5600 compatible)
+	7016  SiS7016 10/100 Ethernet Adapter
+		1039 7016  SiS7016 10/100 Ethernet Adapter
+	7018  SiS PCI Audio Accelerator
+		1014 01b6  SiS PCI Audio Accelerator
+		1014 01b7  SiS PCI Audio Accelerator
+		1019 7018  SiS PCI Audio Accelerator
+		1025 000e  SiS PCI Audio Accelerator
+		1025 0018  SiS PCI Audio Accelerator
+		1039 7018  SiS PCI Audio Accelerator
+		1043 800b  SiS PCI Audio Accelerator
+		1054 7018  SiS PCI Audio Accelerator
+		107d 5330  SiS PCI Audio Accelerator
+		107d 5350  SiS PCI Audio Accelerator
+		1170 3209  SiS PCI Audio Accelerator
+		1462 400a  SiS PCI Audio Accelerator
+		14a4 2089  SiS PCI Audio Accelerator
+		14cd 2194  SiS PCI Audio Accelerator
+		14ff 1100  SiS PCI Audio Accelerator
+		152d 8808  SiS PCI Audio Accelerator
+		1558 1103  SiS PCI Audio Accelerator
+		1558 2200  SiS PCI Audio Accelerator
+		1563 7018  SiS PCI Audio Accelerator
+		15c5 0111  SiS PCI Audio Accelerator
+		270f a171  SiS PCI Audio Accelerator
+		a0a0 0022  SiS PCI Audio Accelerator
+103a  Seiko Epson Corporation
+103b  Tatung Co. of America
+103c  Hewlett-Packard Company
+	1005  A4977A Visualize EG
+	1006  Visualize FX6
+	1008  Visualize FX4
+	100a  Visualize FX2
+	1028  Tach TL Fibre Channel Host Adapter
+	1029  Tach XL2 Fibre Channel Host Adapter
+		107e 000f  Interphase 5560 Fibre Channel Adapter
+		9004 9210  1Gb/2Gb Family Fibre Channel Controller
+		9004 9211  1Gb/2Gb Family Fibre Channel Controller
+	102a  Tach TS Fibre Channel Host Adapter
+		107e 000e  Interphase 5540/5541 Fibre Channel Adapter
+		9004 9110  1Gb/2Gb Family Fibre Channel Controller
+		9004 9111  1Gb/2Gb Family Fibre Channel Controller
+	1030  J2585A DeskDirect 10/100VG NIC
+	1031  J2585B HP 10/100VG PCI LAN Adapter
+		103c 1040  J2973A DeskDirect 10BaseT NIC
+		103c 1041  J2585B DeskDirect 10/100VG NIC
+		103c 1042  J2970A DeskDirect 10BaseT/2 NIC
+	1040  J2973A DeskDirect 10BaseT NIC
+	1041  J2585B DeskDirect 10/100 NIC
+	1042  J2970A DeskDirect 10BaseT/2 NIC
+	1048  Diva Serial [GSP] Multiport UART
+		103c 1049  Tosca Console
+		103c 104a  Tosca Secondary
+		103c 104b  Maestro SP2
+		103c 1223  Halfdome Console
+		103c 1226  Keystone SP2
+		103c 1227  Powerbar SP2
+		103c 1282  Everest SP2
+	1064  79C970 PCnet Ethernet Controller
+	108b  Visualize FXe
+	10c1  NetServer Smart IRQ Router
+	10ed  TopTools Remote Control
+	1200  82557B 10/100 NIC
+	1219  NetServer PCI Hot-Plug Controller
+	121a  NetServer SMIC Controller
+	121b  NetServer Legacy COM Port Decoder
+	121c  NetServer PCI COM Port Decoder
+	1229  zx1 System Bus Adapter
+	122a  zx1 I/O Controller
+	122e  zx1 Local Bus Adapter
+	1290  Auxiliary Diva Serial Port
+	2910  E2910A PCIBus Exerciser
+	2925  E2925A 32 Bit, 33 MHzPCI Exerciser & Analyzer
+103e  Solliday Engineering
+103f  Synopsys/Logic Modeling Group
+1040  Accelgraphics Inc.
+1041  Computrend
+1042  Micron
+	1000  FDC 37C665
+	1001  37C922
+	3000  Samurai_0
+	3010  Samurai_1
+	3020  Samurai_IDE
+1043  Asustek Computer, Inc.
+	0675  ISDNLink P-IN100-ST-D
+	4057  V8200 GeForce 3
+1044  Distributed Processing Technology
+	1012  Domino RAID Engine
+	a400  SmartCache/Raid I-IV Controller
+	a500  PCI Bridge
+	a501  SmartRAID V Controller
+		1044 c001  PM1554U2 Ultra2 Single Channel
+		1044 c002  PM1654U2 Ultra2 Single Channel
+		1044 c003  PM1564U3 Ultra3 Single Channel
+		1044 c004  PM1564U3 Ultra3 Dual Channel
+		1044 c005  PM1554U2 Ultra2 Single Channel (NON ACPI)
+		1044 c00a  PM2554U2 Ultra2 Single Channel
+		1044 c00b  PM2654U2 Ultra2 Single Channel
+		1044 c00c  PM2664U3 Ultra3 Single Channel
+		1044 c00d  PM2664U3 Ultra3 Dual Channel
+		1044 c00e  PM2554U2 Ultra2 Single Channel (NON ACPI)
+		1044 c00f  PM2654U2 Ultra2 Single Channel (NON ACPI)
+		1044 c014  PM3754U2 Ultra2 Single Channel (NON ACPI)
+		1044 c015  PM3755U2B Ultra2 Single Channel (NON ACPI)
+		1044 c016  PM3755F Fibre Channel (NON ACPI)
+		1044 c01e  PM3757U2 Ultra2 Single Channel
+		1044 c01f  PM3757U2 Ultra2 Dual Channel
+		1044 c020  PM3767U3 Ultra3 Dual Channel
+		1044 c021  PM3767U3 Ultra3 Quad Channel
+		1044 c028  PM2865U3 Ultra3 Single Channel
+		1044 c029  PM2865U3 Ultra3 Dual Channel
+		1044 c02a  PM2865F Fibre Channel
+		1044 c03c  2000S Ultra3 Single Channel
+		1044 c03d  2000S Ultra3 Dual Channel
+		1044 c03e  2000F Fibre Channel
+		1044 c046  3000S Ultra3 Single Channel
+		1044 c047  3000S Ultra3 Dual Channel
+		1044 c048  3000F Fibre Channel
+		1044 c050  5000S Ultra3 Single Channel
+		1044 c051  5000S Ultra3 Dual Channel
+		1044 c052  5000F Fibre Channel
+		1044 c05a  2400A UDMA Four Channel
+		1044 c05b  2400A UDMA Four Channel DAC
+		1044 c064  3010S Ultra3 Dual Channel
+		1044 c065  3010S Ultra3 Four Channel
+		1044 c066  3010S Fibre Channel
+	a511  SmartRAID V Controller
+1045  OPTi Inc.
+	a0f8  82C750 [Vendetta] USB Controller
+	c101  92C264
+	c178  92C178
+	c556  82X556 [Viper]
+	c557  82C557 [Viper-M]
+	c558  82C558 [Viper-M ISA+IDE]
+	c567  82C750 [Vendetta], device 0
+	c568  82C750 [Vendetta], device 1
+	c569  82C579 [Viper XPress+ Chipset]
+	c621  82C621 [Viper-M/N+]
+	c700  82C700 [FireStar]
+	c701  82C701 [FireStar Plus]
+	c814  82C814 [Firebridge 1]
+	c822  82C822
+	c824  82C824
+	c825  82C825 [Firebridge 2]
+	c832  82C832
+	c861  82C861
+	c895  82C895
+	c935  EV1935 ECTIVA MachOne PCI Audio
+	d568  82C825 [Firebridge 2]
+	d721  IDE [FireStar]
+1046  IPC Corporation, Ltd.
+1047  Genoa Systems Corp
+1048  Elsa AG
+	0d22  Quadro4 900XGL [ELSA GLoria4 900XGL]
+	1000  QuickStep 1000
+	3000  QuickStep 3000
+1049  Fountain Technologies, Inc.
+104a  SGS Thomson Microelectronics
+	0008  STG 2000X
+	0009  STG 1764X
+	0981  DEC-Tulip compatible 10/100 Ethernet
+	1746  STG 1764X
+	2774  DEC-Tulip compatible 10/100 Ethernet
+	3520  MPEG-II decoder card
+104b  BusLogic
+	0140  BT-946C (old) [multimaster  01]
+	1040  BT-946C (BA80C30) [MultiMaster 10]
+	8130  Flashpoint LT
+104c  Texas Instruments
+	0500  100 MBit LAN Controller
+	0508  TMS380C2X Compressor Interface
+	1000  Eagle i/f AS
+	3d04  TVP4010 [Permedia]
+	3d07  TVP4020 [Permedia 2]
+		1011 4d10  Comet
+		1040 000f  AccelStar II
+		1040 0011  AccelStar II
+		1048 0a31  WINNER 2000
+		1048 0a32  GLoria Synergy
+		1048 0a35  GLoria Synergy
+		107d 2633  WinFast 3D L2300
+		1092 0127  FIRE GL 1000 PRO
+		1092 0136  FIRE GL 1000 PRO
+		1092 0141  FIRE GL 1000 PRO
+		1092 0146  FIRE GL 1000 PRO
+		1092 0148  FIRE GL 1000 PRO
+		1092 0149  FIRE GL 1000 PRO
+		1092 0152  FIRE GL 1000 PRO
+		1092 0154  FIRE GL 1000 PRO
+		1092 0155  FIRE GL 1000 PRO
+		1092 0156  FIRE GL 1000 PRO
+		1092 0157  FIRE GL 1000 PRO
+		1097 3d01  Jeronimo Pro
+		1102 100f  Graphics Blaster Extreme
+		3d3d 0100  Reference Permedia 2 3D
+	8000  PCILynx/PCILynx2 IEEE 1394 Link Layer Controller
+		e4bf 1010  CF1-1-SNARE
+		e4bf 1020  CF1-2-SNARE
+	8009  FireWire Controller
+		104d 8032  8032 OHCI i.LINK (IEEE 1394) Controller
+	8017  PCI4410 FireWire Controller
+	8019  TSB12LV23 IEEE-1394 Controller
+		11bd 000a  Studio DV500-1394
+		11bd 000e  Studio DV
+		e4bf 1010  CF2-1-CYMBAL
+	8020  TSB12LV26 IEEE-1394 Controller (Link)
+	8021  TSB43AA22 IEEE-1394 Controller (PHY/Link Integrated)
+		104d 80df  Vaio PCG-FX403
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	8022  TSB43AB22 IEEE-1394a-2000 Controller (PHY/Link)
+	8023  TSB43AB22/A IEEE-1394a-2000 Controller (PHY/Link)
+	8024  TSB43AB23 IEEE-1394a-2000 Controller (PHY/Link)
+	8026  TSB43AB21 IEEE-1394a-2000 Controller (PHY/Link)
+	8027  PCI4451 IEEE-1394 Controller
+		1028 00e6  PCI4451 IEEE-1394 Controller (Dell Inspiron 8100)
+	a001  TDC1570
+	a100  TDC1561
+	a102  TNETA1575 HyperSAR Plus w/PCI Host i/f & UTOPIA i/f
+	ac10  PCI1050
+	ac11  PCI1053
+	ac12  PCI1130
+	ac13  PCI1031
+	ac15  PCI1131
+	ac16  PCI1250
+	ac17  PCI1220
+	ac18  PCI1260
+	ac19  PCI1221
+	ac1a  PCI1210
+	ac1b  PCI1450
+	ac1c  PCI1225
+	ac1d  PCI1251A
+	ac1e  PCI1211
+	ac1f  PCI1251B
+	ac20  TI 2030
+	ac21  PCI2031
+	ac22  PCI2032 PCI Docking Bridge
+	ac23  PCI2250 PCI-to-PCI Bridge
+	ac28  PCI2050 PCI-to-PCI Bridge
+	ac30  PCI1260 PC card Cardbus Controller
+	ac40  PCI4450 PC card Cardbus Controller
+	ac41  PCI4410 PC card Cardbus Controller
+	ac42  PCI4451 PC card Cardbus Controller
+		1028 00e6  PCI4451 PC card CardBus Controller (Dell Inspiron 8100)
+	ac50  PCI1410 PC card Cardbus Controller
+	ac51  PCI1420
+		1014 023b  ThinkPad T23 (2647-4MG)
+		10cf 1095  Lifebook C6155
+		e4bf 1000  CP2-2-HIPHOP
+	ac52  PCI1451 PC card Cardbus Controller
+	ac53  PCI1421 PC card Cardbus Controller
+	ac55  PCI1250 PC card Cardbus Controller
+	ac60  PCI2040 PCI to DSP Bridge Controller
+	fe00  FireWire Host Controller
+	fe03  12C01A FireWire Host Controller
+104d  Sony Corporation
+	8009  CXD1947Q i.LINK Controller
+	8039  CXD3222 i.LINK Controller
+	8056  Rockwell HCF 56K modem
+	808a  Memory Stick Controller
+104e  Oak Technology, Inc
+	0017  OTI-64017
+	0107  OTI-107 [Spitfire]
+	0109  Video Adapter
+	0111  OTI-64111 [Spitfire]
+	0217  OTI-64217
+	0317  OTI-64317
+104f  Co-time Computer Ltd
+1050  Winbond Electronics Corp
+	0000  NE2000
+	0001  W83769F
+	0105  W82C105
+	0840  W89C840
+		1050 0001  W89C840 Ethernet Adapter
+		1050 0840  W89C840 Ethernet Adapter
+	0940  W89C940
+	5a5a  W89C940F
+	9970  W9970CF
+1051  Anigma, Inc.
+1052  ?Young Micro Systems
+1053  Young Micro Systems
+1054  Hitachi, Ltd
+1055  Efar Microsystems
+	9130  SLC90E66 [Victory66] IDE
+	9460  SLC90E66 [Victory66] ISA
+	9462  SLC90E66 [Victory66] USB
+	9463  SLC90E66 [Victory66] ACPI
+1056  ICL
+# Motorola made a mistake and used 1507 instead of 1057 in some chips. Please look at the 1507 entry as well when updating this.
+1057  Motorola
+	0001  MPC105 [Eagle]
+	0002  MPC106 [Grackle]
+	0003  MPC8240 [Kahlua]
+	0100  MC145575 [HFC-PCI]
+	0431  KTI829c 100VG
+	1801  Audio I/O Controller (MIDI)
+		ecc0 0030  Layla
+	18c0  MPC8265A/MPC8266
+	4801  Raven
+	4802  Falcon
+	4803  Hawk
+	4806  CPX8216
+	4d68  20268
+	5600  SM56 PCI Modem
+		1057 0300  SM56 PCI Speakerphone Modem
+		1057 0301  SM56 PCI Voice Modem
+		1057 0302  SM56 PCI Fax Modem
+		1057 5600  SM56 PCI Voice modem
+		13d2 0300  SM56 PCI Speakerphone Modem
+		13d2 0301  SM56 PCI Voice modem
+		13d2 0302  SM56 PCI Fax Modem
+		1436 0300  SM56 PCI Speakerphone Modem
+		1436 0301  SM56 PCI Voice modem
+		1436 0302  SM56 PCI Fax Modem
+		144f 100c  SM56 PCI Fax Modem
+		1494 0300  SM56 PCI Speakerphone Modem
+		1494 0301  SM56 PCI Voice modem
+		14c8 0300  SM56 PCI Speakerphone Modem
+		14c8 0302  SM56 PCI Fax Modem
+		1668 0300  SM56 PCI Speakerphone Modem
+		1668 0302  SM56 PCI Fax Modem
+	6400  MPC190 Security Processor (S1 family, encryption)
+1058  Electronics & Telecommunications RSH
+1059  Teknor Industrial Computers Inc
+105a  Promise Technology, Inc.
+	0d30  20265
+		105a 4d33  Ultra100
+	0d38  20263
+		105a 4d39  Fasttrak66
+	1275  20275
+	4d30  20267
+		105a 4d33  Ultra100
+		105a 4d39  Fasttrak100
+	4d33  20246
+		105a 4d33  20246 IDE Controller
+	4d38  20262
+		105a 4d30  Ultra Device on SuperTrak
+		105a 4d33  Ultra66
+		105a 4d39  Fasttrak66
+	4d68  20268
+		105a 4d68  Ultra100TX2
+	4d69  20269
+	5275  PDC20276 IDE
+		105a 0275  SuperTrak SX6000 IDE
+	5300  DC5300
+	6268  20268R
+	6269  PDC20271
+		105a 6269  Fasttrack tx2
+	7275  PDC20277
+105b  Foxconn International, Inc.
+105c  Wipro Infotech Limited
+105d  Number 9 Computer Company
+	2309  Imagine 128
+	2339  Imagine 128-II
+		105d 0000  Imagine 128 series 2 4Mb VRAM
+		105d 0001  Imagine 128 series 2 4Mb VRAM
+		105d 0002  Imagine 128 series 2 4Mb VRAM
+		105d 0003  Imagine 128 series 2 4Mb VRAM
+		105d 0004  Imagine 128 series 2 4Mb VRAM
+		105d 0005  Imagine 128 series 2 4Mb VRAM
+		105d 0006  Imagine 128 series 2 4Mb VRAM
+		105d 0007  Imagine 128 series 2 4Mb VRAM
+		105d 0008  Imagine 128 series 2e 4Mb DRAM
+		105d 0009  Imagine 128 series 2e 4Mb DRAM
+		105d 000a  Imagine 128 series 2 8Mb VRAM
+		105d 000b  Imagine 128 series 2 8Mb H-VRAM
+		11a4 000a  Barco Metheus 5 Megapixel
+		13cc 0000  Barco Metheus 5 Megapixel
+		13cc 0004  Barco Metheus 5 Megapixel
+		13cc 0005  Barco Metheus 5 Megapixel
+		13cc 0006  Barco Metheus 5 Megapixel
+		13cc 0008  Barco Metheus 5 Megapixel
+		13cc 0009  Barco Metheus 5 Megapixel
+		13cc 000a  Barco Metheus 5 Megapixel
+		13cc 000c  Barco Metheus 5 Megapixel
+	493d  Imagine 128 T2R [Ticket to Ride]
+		11a4 000a  Barco Metheus 5 Megapixel, Dual Head
+		11a4 000b  Barco Metheus 5 Megapixel, Dual Head
+		13cc 0002  Barco Metheus 4 Megapixel, Dual Head
+		13cc 0003  Barco Metheus 5 Megapixel, Dual Head
+		13cc 0007  Barco Metheus 5 Megapixel, Dual Head
+		13cc 0008  Barco Metheus 5 Megapixel, Dual Head
+		13cc 0009  Barco Metheus 5 Megapixel, Dual Head
+		13cc 000a  Barco Metheus 5 Megapixel, Dual Head
+	5348  Revolution 4
+105e  Vtech Computers Ltd
+105f  Infotronic America Inc
+1060  United Microelectronics [UMC]
+	0001  UM82C881
+	0002  UM82C886
+	0101  UM8673F
+	0881  UM8881
+	0886  UM8886F
+	0891  UM8891A
+	1001  UM886A
+	673a  UM8886BF
+	673b  EIDE Master/DMA
+	8710  UM8710
+	886a  UM8886A
+	8881  UM8881F
+	8886  UM8886F
+	888a  UM8886A
+	8891  UM8891A
+	9017  UM9017F
+	9018  UM9018
+	9026  UM9026
+	e881  UM8881N
+	e886  UM8886N
+	e88a  UM8886N
+	e891  UM8891N
+1061  I.I.T.
+	0001  AGX016
+	0002  IIT3204/3501
+1062  Maspar Computer Corp
+1063  Ocean Office Automation
+1064  Alcatel
+1065  Texas Microsystems
+1066  PicoPower Technology
+	0000  PT80C826
+	0001  PT86C521 [Vesuvius v1] Host Bridge
+	0002  PT86C523 [Vesuvius v3] PCI-ISA Bridge Master
+	0003  PT86C524 [Nile] PCI-to-PCI Bridge
+	0004  PT86C525 [Nile-II] PCI-to-PCI Bridge
+	0005  National PC87550 System Controller
+	8002  PT86C523 [Vesuvius v3] PCI-ISA Bridge Slave
+1067  Mitsubishi Electric
+	1002  VG500 [VolumePro Volume Rendering Accelerator]
+1068  Diversified Technology
+1069  Mylex Corporation
+	0001  DAC960P
+	0002  DAC960PD
+	0010  DAC960PX
+	0050  AcceleRAID 352/170/160 support Device
+	ba55  eXtremeRAID 1100 support Device
+	ba56  eXtremeRAID 2000/3000 support Device
+106a  Aten Research Inc
+106b  Apple Computer Inc.
+	0001  Bandit PowerPC host bridge
+	0002  Grand Central I/O
+	0003  Control Video
+	0004  PlanB Video-In
+	0007  O'Hare I/O
+	000e  Hydra Mac I/O
+	0010  Heathrow Mac I/O
+	0017  Paddington Mac I/O
+	0018  UniNorth FireWire
+	0019  KeyLargo USB
+	001e  UniNorth Internal PCI
+	001f  UniNorth PCI
+	0020  UniNorth AGP
+	0021  UniNorth GMAC (Sun GEM)
+	0022  KeyLargo Mac I/O
+	0024  UniNorth/Pangea GMAC (Sun GEM)
+	0025  KeyLargo/Pangea Mac I/O
+	0026  KeyLargo/Pangea USB
+	0027  UniNorth/Pangea AGP
+	0028  UniNorth/Pangea PCI
+	0029  UniNorth/Pangea Internal PCI
+	002d  UniNorth 1.5 AGP
+	002e  UniNorth 1.5 PCI
+	002f  UniNorth 1.5 Internal PCI
+	0030  UniNorth/Pangea FireWire
+106c  Hyundai Electronics America
+	8801  Dual Pentium ISA/PCI Motherboard
+	8802  PowerPC ISA/PCI Motherboard
+	8803  Dual Window Graphics Accelerator
+	8804  LAN Controller
+	8805  100-BaseT LAN
+106d  Sequent Computer Systems
+106e  DFI, Inc
+106f  City Gate Development Ltd
+1070  Daewoo Telecom Ltd
+1071  Mitac
+1072  GIT Co Ltd
+1073  Yamaha Corporation
+	0001  3D GUI Accelerator
+	0002  YGV615 [RPA3 3D-Graphics Controller]
+	0003  YMF-740
+	0004  YMF-724
+		1073 0004  YMF724-Based PCI Audio Adapter
+	0005  DS1 Audio
+		1073 0005  DS-XG PCI Audio CODEC
+	0006  DS1 Audio
+	0008  DS1 Audio
+		1073 0008  DS-XG PCI Audio CODEC
+	000a  DS1L Audio
+		1073 0004  DS-XG PCI Audio CODEC
+		1073 000a  DS-XG PCI Audio CODEC
+	000c  YMF-740C [DS-1L Audio Controller]
+		107a 000c  DS-XG PCI Audio CODEC
+	000d  YMF-724F [DS-1 Audio Controller]
+		1073 000d  DS-XG PCI Audio CODEC
+	0010  YMF-744B [DS-1S Audio Controller]
+		1073 0006  DS-XG PCI Audio CODEC
+		1073 0010  DS-XG PCI Audio CODEC
+	0012  YMF-754 [DS-1E Audio Controller]
+		1073 0012  DS-XG PCI Audio Codec
+	0020  DS-1 Audio
+	2000  DS2416 Digital Mixing Card
+		1073 2000  DS2416 Digital Mixing Card
+1074  NexGen Microsystems
+	4e78  82c500/1
+1075  Advanced Integrations Research
+1076  Chaintech Computer Co. Ltd
+1077  QLogic Corp.
+	1016  ISP10160 Single Channel Ultra3 SCSI Processor
+	1020  ISP1020 Fast-wide SCSI
+	1022  ISP1022 Fast-wide SCSI
+	1080  ISP1080 SCSI Host Adapter
+	1216  ISP12160 Dual Channel Ultra3 SCSI Processor
+		101e 8471  QLA12160 on AMI MegaRAID
+		101e 8493  QLA12160 on AMI MegaRAID
+	1240  ISP1240 SCSI Host Adapter
+	1280  ISP1280
+	2020  ISP2020A Fast!SCSI Basic Adapter
+	2100  QLA2100 64-bit Fibre Channel Adapter
+		1077 0001  QLA2100 64-bit Fibre Channel Adapter
+	2200  QLA2200
+	2300  QLA2300 64-bit FC-AL Adapter
+	2312  QLA2312 Fibre Channel Adapter
+1078  Cyrix Corporation
+	0000  5510 [Grappa]
+	0001  PCI Master
+	0002  5520 [Cognac]
+	0100  5530 Legacy [Kahlua]
+	0101  5530 SMI [Kahlua]
+	0102  5530 IDE [Kahlua]
+	0103  5530 Audio [Kahlua]
+	0104  5530 Video [Kahlua]
+	0400  ZFMicro PCI Bridge
+	0401  ZFMicro Chipset SMI
+	0402  ZFMicro Chipset IDE
+	0403  ZFMicro Expansion Bus
+1079  I-Bus
+107a  NetWorth
+107b  Gateway 2000
+107c  LG Electronics [Lucky Goldstar Co. Ltd]
+107d  LeadTek Research Inc.
+	0000  P86C850
+107e  Interphase Corporation
+	0001  5515 ATM Adapter [Flipper]
+	0002  100 VG AnyLan Controller
+	0004  5526 Fibre Channel Host Adapter
+	0005  x526 Fibre Channel Host Adapter
+	0008  5525/5575 ATM Adapter (155 Mbit) [Atlantic]
+	9003  5535-4P-BRI-ST
+	9007  5535-4P-BRI-U
+	9008  5535-1P-SR
+	900c  5535-1P-SR-ST
+	900e  5535-1P-SR-U
+	9011  5535-1P-PRI
+	9013  5535-2P-PRI
+	9023  5536-4P-BRI-ST
+	9027  5536-4P-BRI-U
+	9031  5536-1P-PRI
+	9033  5536-2P-PRI
+107f  Data Technology Corporation
+	0802  SL82C105
+1080  Contaq Microsystems
+	0600  82C599
+	c691  Cypress CY82C691
+	c693  82c693
+1081  Supermac Technology
+	0d47  Radius PCI to NuBUS Bridge
+1082  EFA Corporation of America
+1083  Forex Computer Corporation
+	0001  FR710
+1084  Parador
+1085  Tulip Computers Int.B.V.
+1086  J. Bond Computer Systems
+1087  Cache Computer
+1088  Microcomputer Systems (M) Son
+1089  Data General Corporation
+108a  SBS Technologies (formerly Bit3 Computer Corp.)
+	0001  VME Bridge Model 617
+	0010  VME Bridge Model 618
+	0040  dataBLIZZARD
+	3000  VME Bridge Model 2706
+108c  Oakleigh Systems Inc.
+108d  Olicom
+	0001  Token-Ring 16/4 PCI Adapter (3136/3137)
+	0002  16/4 Token Ring
+	0004  RapidFire 3139 Token-Ring 16/4 PCI Adapter
+		108d 0004  OC-3139/3140 RapidFire Token-Ring 16/4 Adapter
+	0005  GoCard 3250 Token-Ring 16/4 CardBus PC Card
+	0006  OC-3530 RapidFire Token-Ring 100
+	0007  RapidFire 3141 Token-Ring 16/4 PCI Fiber Adapter
+		108d 0007  OC-3141 RapidFire Token-Ring 16/4 Adapter
+	0008  RapidFire 3540 HSTR 100/16/4 PCI Adapter
+		108d 0008  OC-3540 RapidFire HSTR 100/16/4 Adapter
+	0011  OC-2315
+	0012  OC-2325
+	0013  OC-2183/2185
+	0014  OC-2326
+	0019  OC-2327/2250 10/100 Ethernet Adapter
+		108d 0016  OC-2327 Rapidfire 10/100 Ethernet Adapter
+		108d 0017  OC-2250 GoCard 10/100 Ethernet Adapter
+	0021  OC-6151/6152 [RapidFire ATM 155]
+	0022  ATM Adapter
+108e  Sun Microsystems Computer Corp.
+	0001  EBUS
+	1000  EBUS
+	1001  Happy Meal
+	1100  RIO EBUS
+	1101  RIO GEM
+	1102  RIO 1394
+	1103  RIO USB
+	2bad  GEM
+	5000  Simba Advanced PCI Bridge
+	5043  SunPCI Co-processor
+	8000  Psycho PCI Bus Module
+	8001  Schizo PCI Bus Module
+	a000  Ultra IIi
+	a001  Ultra IIe
+108f  Systemsoft
+1090  Encore Computer Corporation
+1091  Intergraph Corporation
+	0020  3D graphics processor
+	0021  3D graphics processor w/Texturing
+	0040  3D graphics frame buffer
+	0041  3D graphics frame buffer
+	0060  Proprietary bus bridge
+	00e4  Powerstorm 4D50T
+	0720  Motion JPEG codec
+1092  Diamond Multimedia Systems
+	00a0  Speedstar Pro SE
+	00a8  Speedstar 64
+	0550  Viper V550
+	08d4  Supra 2260 Modem
+	094c  SupraExpress 56i Pro
+	1092  Viper V330
+	6120  Maximum DVD
+	8810  Stealth SE
+	8811  Stealth 64/SE
+	8880  Stealth
+	8881  Stealth
+	88b0  Stealth 64
+	88b1  Stealth 64
+	88c0  Stealth 64
+	88c1  Stealth 64
+	88d0  Stealth 64
+	88d1  Stealth 64
+	88f0  Stealth 64
+	88f1  Stealth 64
+	9999  DMD-I0928-1 "Monster sound" sound chip
+1093  National Instruments
+	0160  PCI-DIO-96
+	0162  PCI-MIO-16XE-50
+	1170  PCI-MIO-16XE-10
+	1180  PCI-MIO-16E-1
+	1190  PCI-MIO-16E-4
+	1330  PCI-6031E
+	1350  PCI-6071E
+	2a60  PCI-6023E
+	b001  IMAQ-PCI-1408
+	b011  IMAQ-PXI-1408
+	b021  IMAQ-PCI-1424
+	b031  IMAQ-PCI-1413
+	b041  IMAQ-PCI-1407
+	b051  IMAQ-PXI-1407
+	b061  IMAQ-PCI-1411
+	b071  IMAQ-PCI-1422
+	b081  IMAQ-PXI-1422
+	b091  IMAQ-PXI-1411
+	c801  PCI-GPIB
+1094  First International Computers [FIC]
+1095  CMD Technology Inc
+	0640  PCI0640
+	0643  PCI0643
+	0646  PCI0646
+	0647  PCI0647
+	0648  PCI0648
+	0649  PCI0649
+		0e11 005d  Integrated Ultra ATA-100 Dual Channel Controller
+		0e11 007e  Integrated Ultra ATA-100 IDE RAID Controller
+		101e 0649  AMI MegaRAID IDE 100 Controller
+	0650  PBC0650A
+	0670  USB0670
+		1095 0670  USB0670
+	0673  USB0673
+	0680  PCI0680
+1096  Alacron
+1097  Appian Technology
+1098  Quantum Designs (H.K.) Ltd
+	0001  QD-8500
+	0002  QD-8580
+1099  Samsung Electronics Co., Ltd
+109a  Packard Bell
+109b  Gemlight Computer Ltd.
+109c  Megachips Corporation
+109d  Zida Technologies Ltd.
+109e  Brooktree Corporation
+	0350  Bt848 Video Capture
+	0351  Bt849A Video capture
+	0369  Bt878 Video Capture
+		1002 0001  TV-Wonder
+		1002 0003  TV-Wonder/VE
+	036c  Bt879(??) Video Capture
+		13e9 0070  Win/TV (Video Section)
+	036e  Bt878 Video Capture
+		0070 13eb  WinTV/GO
+		0070 ff01  Viewcast Osprey 200
+		11bd 001c  PCTV Sat (DBC receiver)
+		127a 0001  Bt878 Mediastream Controller NTSC
+		127a 0002  Bt878 Mediastream Controller PAL BG
+		127a 0003  Bt878a Mediastream Controller PAL BG
+		127a 0048  Bt878/832 Mediastream Controller
+		144f 3000  MagicTView CPH060 - Video
+		1461 0004  AVerTV WDM Video Capture
+		14f1 0001  Bt878 Mediastream Controller NTSC
+		14f1 0002  Bt878 Mediastream Controller PAL BG
+		14f1 0003  Bt878a Mediastream Controller PAL BG
+		14f1 0048  Bt878/832 Mediastream Controller
+		1851 1850  FlyVideo'98 - Video
+		1851 1851  FlyVideo II
+		1852 1852  FlyVideo'98 - Video (with FM Tuner)
+	036f  Bt879 Video Capture
+		127a 0044  Bt879 Video Capture NTSC
+		127a 0122  Bt879 Video Capture PAL I
+		127a 0144  Bt879 Video Capture NTSC
+		127a 0222  Bt879 Video Capture PAL BG
+		127a 0244  Bt879a Video Capture NTSC
+		127a 0322  Bt879 Video Capture NTSC
+		127a 0422  Bt879 Video Capture NTSC
+		127a 1122  Bt879 Video Capture PAL I
+		127a 1222  Bt879 Video Capture PAL BG
+		127a 1322  Bt879 Video Capture NTSC
+		127a 1522  Bt879a Video Capture PAL I
+		127a 1622  Bt879a Video Capture PAL BG
+		127a 1722  Bt879a Video Capture NTSC
+		14f1 0044  Bt879 Video Capture NTSC
+		14f1 0122  Bt879 Video Capture PAL I
+		14f1 0144  Bt879 Video Capture NTSC
+		14f1 0222  Bt879 Video Capture PAL BG
+		14f1 0244  Bt879a Video Capture NTSC
+		14f1 0322  Bt879 Video Capture NTSC
+		14f1 0422  Bt879 Video Capture NTSC
+		14f1 1122  Bt879 Video Capture PAL I
+		14f1 1222  Bt879 Video Capture PAL BG
+		14f1 1322  Bt879 Video Capture NTSC
+		14f1 1522  Bt879a Video Capture PAL I
+		14f1 1622  Bt879a Video Capture PAL BG
+		14f1 1722  Bt879a Video Capture NTSC
+		1851 1850  FlyVideo'98 - Video
+		1851 1851  FlyVideo II
+		1852 1852  FlyVideo'98 - Video (with FM Tuner)
+	0370  Bt880 Video Capture
+		1851 1850  FlyVideo'98
+		1851 1851  FlyVideo'98 EZ - video
+		1852 1852  FlyVideo'98 (with FM Tuner)
+	0878  Bt878 Audio Capture
+		0070 13eb  WinTV/GO
+		0070 ff01  Viewcast Osprey 200
+		1002 0001  TV-Wonder
+		1002 0003  TV-Wonder/VE
+		11bd 001c  PCTV Sat (DBC receiver)
+		127a 0001  Bt878 Video Capture (Audio Section)
+		127a 0002  Bt878 Video Capture (Audio Section)
+		127a 0003  Bt878 Video Capture (Audio Section)
+		127a 0048  Bt878 Video Capture (Audio Section)
+		13e9 0070  Win/TV (Audio Section)
+		144f 3000  MagicTView CPH060 - Audio
+		1461 0004  AVerTV WDM Audio Capture
+		14f1 0001  Bt878 Video Capture (Audio Section)
+		14f1 0002  Bt878 Video Capture (Audio Section)
+		14f1 0003  Bt878 Video Capture (Audio Section)
+		14f1 0048  Bt878 Video Capture (Audio Section)
+	0879  Bt879 Audio Capture
+		127a 0044  Bt879 Video Capture (Audio Section)
+		127a 0122  Bt879 Video Capture (Audio Section)
+		127a 0144  Bt879 Video Capture (Audio Section)
+		127a 0222  Bt879 Video Capture (Audio Section)
+		127a 0244  Bt879 Video Capture (Audio Section)
+		127a 0322  Bt879 Video Capture (Audio Section)
+		127a 0422  Bt879 Video Capture (Audio Section)
+		127a 1122  Bt879 Video Capture (Audio Section)
+		127a 1222  Bt879 Video Capture (Audio Section)
+		127a 1322  Bt879 Video Capture (Audio Section)
+		127a 1522  Bt879 Video Capture (Audio Section)
+		127a 1622  Bt879 Video Capture (Audio Section)
+		127a 1722  Bt879 Video Capture (Audio Section)
+		14f1 0044  Bt879 Video Capture (Audio Section)
+		14f1 0122  Bt879 Video Capture (Audio Section)
+		14f1 0144  Bt879 Video Capture (Audio Section)
+		14f1 0222  Bt879 Video Capture (Audio Section)
+		14f1 0244  Bt879 Video Capture (Audio Section)
+		14f1 0322  Bt879 Video Capture (Audio Section)
+		14f1 0422  Bt879 Video Capture (Audio Section)
+		14f1 1122  Bt879 Video Capture (Audio Section)
+		14f1 1222  Bt879 Video Capture (Audio Section)
+		14f1 1322  Bt879 Video Capture (Audio Section)
+		14f1 1522  Bt879 Video Capture (Audio Section)
+		14f1 1622  Bt879 Video Capture (Audio Section)
+		14f1 1722  Bt879 Video Capture (Audio Section)
+	0880  Bt880 Audio Capture
+	2115  BtV 2115 Mediastream controller
+	2125  BtV 2125 Mediastream controller
+	2164  BtV 2164
+	2165  BtV 2165
+	8230  Bt8230 ATM Segment/Reassembly Ctrlr (SRC)
+	8472  Bt8472
+	8474  Bt8474
+109f  Trigem Computer Inc.
+10a0  Meidensha Corporation
+10a1  Juko Electronics Ind. Co. Ltd
+10a2  Quantum Corporation
+10a3  Everex Systems Inc
+10a4  Globe Manufacturing Sales
+10a5  Racal Interlan
+10a6  Informtech Industrial Ltd.
+10a7  Benchmarq Microelectronics
+10a8  Sierra Semiconductor
+	0000  STB Horizon 64
+10a9  Silicon Graphics, Inc.
+	0001  Crosstalk to PCI Bridge
+	0002  Linc I/O controller
+	0003  IOC3 I/O controller
+	0004  O2 MACE
+	0005  RAD Audio
+	0006  HPCEX
+	0007  RPCEX
+	0008  DiVO VIP
+	0009  Alteon Gigabit Ethernet
+	0010  AMP Video I/O
+	0011  GRIP
+	0012  SGH PSHAC GSN
+	1001  Magic Carpet
+	1002  Lithium
+	1003  Dual JPEG 1
+	1004  Dual JPEG 2
+	1005  Dual JPEG 3
+	1006  Dual JPEG 4
+	1007  Dual JPEG 5
+	1008  Cesium
+	2001  Fibre Channel
+	2002  ASDE
+	8001  O2 1394
+	8002  G-net NT
+10aa  ACC Microelectronics
+	0000  ACCM 2188
+10ab  Digicom
+10ac  Honeywell IAC
+10ad  Symphony Labs
+	0001  W83769F
+	0003  SL82C103
+	0005  SL82C105
+	0103  SL82c103
+	0105  SL82c105
+	0565  W83C553
+10ae  Cornerstone Technology
+10af  Micro Computer Systems Inc
+10b0  CardExpert Technology
+10b1  Cabletron Systems Inc
+10b2  Raytheon Company
+10b3  Databook Inc
+	3106  DB87144
+	b106  DB87144
+10b4  STB Systems Inc
+	1b1d  Velocity 128 3D
+		10b4 237e  Velocity 4400
+10b5  PLX Technology, Inc.
+	0001  i960 PCI bus interface
+	1076  VScom 800 8 port serial adaptor
+	1077  VScom 400 4 port serial adaptor
+	1078  VScom 210 2 port serial and 1 port parallel adaptor
+	1103  VScom 200 2 port serial adaptor
+	1146  VScom 010 1 port parallel adaptor
+	1147  VScom 020 2 port parallel adaptor
+	2724  Thales PCSM Security Card
+	9030  PCI <-> IOBus Bridge Hot Swap
+		15ed 1002  MCCS 8-port Serial Hot Swap
+		15ed 1003  MCCS 16-port Serial Hot Swap
+	9036  9036
+	9050  PCI <-> IOBus Bridge
+		10b5 2273  SH-ARC SoHard ARCnet card
+		1522 0001  RockForce 4 Port V.90 Data/Fax/Voice Modem
+		1522 0002  RockForce 2 Port V.90 Data/Fax/Voice Modem
+		1522 0003  RockForce 6 Port V.90 Data/Fax/Voice Modem
+		1522 0004  RockForce 8 Port V.90 Data/Fax/Voice Modem
+		1522 0010  RockForce2000 4 Port V.90 Data/Fax/Voice Modem
+		1522 0020  RockForce2000 2 Port V.90 Data/Fax/Voice Modem
+		15ed 1000  Macrolink MCCS 8-port Serial
+		15ed 1001  Macrolink MCCS 16-port Serial
+		15ed 1002  Macrolink MCCS 8-port Serial Hot Swap
+		15ed 1003  Macrolink MCCS 16-port Serial Hot Swap
+		d531 c002  PCIntelliCAN 2xSJA1000 CAN bus
+		d84d 4006  EX-4006 1P
+		d84d 4008  EX-4008 1P EPP/ECP
+		d84d 4014  EX-4014 2P
+		d84d 4018  EX-4018 3P EPP/ECP
+		d84d 4025  EX-4025 1S(16C550) RS-232
+		d84d 4027  EX-4027 1S(16C650) RS-232
+		d84d 4028  EX-4028 1S(16C850) RS-232
+		d84d 4036  EX-4036 2S(16C650) RS-232
+		d84d 4037  EX-4037 2S(16C650) RS-232
+		d84d 4038  EX-4038 2S(16C850) RS-232
+		d84d 4052  EX-4052 1S(16C550) RS-422/485
+		d84d 4053  EX-4053 2S(16C550) RS-422/485
+		d84d 4055  EX-4055 4S(16C550) RS-232
+		d84d 4058  EX-4055 4S(16C650) RS-232
+		d84d 4065  EX-4065 8S(16C550) RS-232
+		d84d 4068  EX-4068 8S(16C650) RS-232
+		d84d 4078  EX-4078 2S(16C552) RS-232+1P
+	9054  PCI <-> IOBus Bridge
+		10b5 2455  Wessex Techology PHIL-PCI
+	9060  9060
+	906d  9060SD
+		125c 0640  Aries 16000P
+	906e  9060ES
+	9080  9080
+		10b5 9080  9080 [real subsystem ID not set]
+		129d 0002  Aculab PCI Prosidy card
+	a001  GTEK Jetport II 2 port serial adaptor
+	c001  GTEK Cyclone 16/32 port serial adaptor
+10b6  Madge Networks
+	0001  Smart 16/4 PCI Ringnode
+	0002  Smart 16/4 PCI Ringnode Mk2
+		10b6 0002  Smart 16/4 PCI Ringnode Mk2
+		10b6 0006  16/4 CardBus Adapter
+	0003  Smart 16/4 PCI Ringnode Mk3
+		0e11 b0fd  Compaq NC4621 PCI, 4/16, WOL
+		10b6 0003  Smart 16/4 PCI Ringnode Mk3
+		10b6 0007  Presto PCI Plus Adapter
+	0004  Smart 16/4 PCI Ringnode Mk1
+	0006  16/4 Cardbus Adapter
+		10b6 0006  16/4 CardBus Adapter
+	0007  Presto PCI Adapter
+		10b6 0007  Presto PCI
+	0009  Smart 100/16/4 PCI-HS Ringnode
+		10b6 0009  Smart 100/16/4 PCI-HS Ringnode
+	000a  Smart 100/16/4 PCI Ringnode
+		10b6 000a  Smart 100/16/4 PCI Ringnode
+	000b  16/4 CardBus Adapter Mk2
+		10b6 0008  16/4 CardBus Adapter Mk2
+		10b6 000b  16/4 Cardbus Adapter Mk2
+	000c  RapidFire 3140V2 16/4 TR Adapter
+		10b6 000c  RapidFire 3140V2 16/4 TR Adapter
+	1000  Collage 25/155 ATM Client Adapter
+	1001  Collage 155 ATM Server Adapter
+10b7  3Com Corporation
+	0001  3c985 1000BaseSX (SX/TX)
+	1006  MINI PCI type 3B Data Fax Modem
+	1007  Mini PCI 56k Winmodem
+		10b7 615c  Mini PCI 56K Modem
+	3390  3c339 TokenLink Velocity
+	3590  3c359 TokenLink Velocity XL
+		10b7 3590  TokenLink Velocity XL Adapter (3C359/359B)
+	4500  3c450 Cyclone/unknown
+	5055  3c555 Laptop Hurricane
+	5057  3c575 [Megahertz] 10/100 LAN CardBus
+		10b7 5a57  3C575 Megahertz 10/100 LAN Cardbus PC Card
+	5157  3c575 [Megahertz] 10/100 LAN CardBus
+		10b7 5b57  3C575 Megahertz 10/100 LAN Cardbus PC Card
+	5257  3CCFE575CT Cyclone CardBus
+		10b7 5c57  FE575C-3Com 10/100 LAN CardBus-Fast Ethernet
+	5900  3c590 10BaseT [Vortex]
+	5920  3c592 EISA 10mbps Demon/Vortex
+	5950  3c595 100BaseTX [Vortex]
+	5951  3c595 100BaseT4 [Vortex]
+	5952  3c595 100Base-MII [Vortex]
+	5970  3c597 EISA Fast Demon/Vortex
+	5b57  3c595 [Megahertz] 10/100 LAN CardBus
+		10b7 5b57  3C575 Megahertz 10/100 LAN Cardbus PC Card
+	6055  3c556 Hurricane CardBus
+	6056  3c556B Hurricane CardBus
+		10b7 6556  10/100 Mini PCI Ethernet Adapter
+	6560  3CCFE656 Cyclone CardBus
+		10b7 656a  3CCFEM656 10/100 LAN+56K Modem CardBus
+	6561  3CCFEM656 10/100 LAN+56K Modem CardBus
+		10b7 656b  3CCFEM656 10/100 LAN+56K Modem CardBus
+	6562  3CCFEM656 [id 6562] Cyclone CardBus
+		10b7 656b  3CCFEM656B 10/100 LAN+56K Modem CardBus
+	6563  3CCFEM656B 10/100 LAN+56K Modem CardBus
+		10b7 656b  3CCFEM656 10/100 LAN+56K Modem CardBus
+	6564  3CCFEM656 [id 6564] Cyclone CardBus
+	7646  3cSOHO100-TX Hurricane
+	7940  3c803 FDDILink UTP Controller
+	7980  3c804 FDDILink SAS Controller
+	7990  3c805 FDDILink DAS Controller
+	8811  Token ring
+	9000  3c900 10BaseT [Boomerang]
+	9001  3c900 Combo [Boomerang]
+	9004  3c900B-TPO [Etherlink XL TPO]
+		10b7 9004  3C900B-TPO Etherlink XL TPO 10Mb
+	9005  3c900B-Combo [Etherlink XL Combo]
+		10b7 9005  3C900B-Combo Etherlink XL Combo
+	9006  3c900B-TPC [Etherlink XL TPC]
+	900a  3c900B-FL [Etherlink XL FL]
+	9050  3c905 100BaseTX [Boomerang]
+	9051  3c905 100BaseT4 [Boomerang]
+	9055  3c905B 100BaseTX [Cyclone]
+		1028 0080  3C905B Fast Etherlink XL 10/100
+		1028 0081  3C905B Fast Etherlink XL 10/100
+		1028 0082  3C905B Fast Etherlink XL 10/100
+		1028 0083  3C905B Fast Etherlink XL 10/100
+		1028 0084  3C905B Fast Etherlink XL 10/100
+		1028 0085  3C905B Fast Etherlink XL 10/100
+		1028 0086  3C905B Fast Etherlink XL 10/100
+		1028 0087  3C905B Fast Etherlink XL 10/100
+		1028 0088  3C905B Fast Etherlink XL 10/100
+		1028 0089  3C905B Fast Etherlink XL 10/100
+		1028 0090  3C905B Fast Etherlink XL 10/100
+		1028 0091  3C905B Fast Etherlink XL 10/100
+		1028 0092  3C905B Fast Etherlink XL 10/100
+		1028 0093  3C905B Fast Etherlink XL 10/100
+		1028 0094  3C905B Fast Etherlink XL 10/100
+		1028 0095  3C905B Fast Etherlink XL 10/100
+		1028 0096  3C905B Fast Etherlink XL 10/100
+		1028 0097  3C905B Fast Etherlink XL 10/100
+		1028 0098  3C905B Fast Etherlink XL 10/100
+		1028 0099  3C905B Fast Etherlink XL 10/100
+		10b7 9055  3C905B Fast Etherlink XL 10/100
+	9056  3c905B-T4 [Fast EtherLink XL 10/100]
+	9058  3c905B-Combo [Deluxe Etherlink XL 10/100]
+	905a  3c905B-FX [Fast Etherlink XL FX 10/100]
+	9200  3c905C-TX/TX-M [Tornado]
+		1028 0095  Integrated 3C905C-TX Fast Etherlink for PC Management NIC
+		10b7 1000  3C905C-TX Fast Etherlink for PC Management NIC
+		10b7 7000  10/100 Mini PCI Ethernet Adapter
+	9800  3c980-TX [Fast Etherlink XL Server Adapter]
+		10b7 9800  3c980-TX Fast Etherlink XL Server Adapter
+	9805  3c980-TX 10/100baseTX NIC [Python-T]
+		10b7 1201  3c982-TXM 10/100baseTX Dual Port A [Hydra]
+		10b7 1202  3c982-TXM 10/100baseTX Dual Port B [Hydra]
+		10b7 9805  3c980 10/100baseTX NIC [Python-T]
+	9900  3C990-TX Typhoon
+	9902  3CR990-TX-95 56-bit Typhoon Client
+	9903  3CR990-TX-97 168-bit Typhoon Client
+	9904  3C990B-TX-M/3C990BSVR [Typhoon2]
+	9905  3CR990-FX-95/97/95 [Typhon Fiber]
+	9908  3CR990SVR95 56-bit Typhoon Server
+	9909  3CR990SVR97 Typhoon Server
+	990b  3C990SVR [Typhoon Server]
+10b8  Standard Microsystems Corp [SMC]
+	0005  83C170QF
+		1055 e000  LANEPIC 10/100 [EVB171Q-PCI]
+		1055 e002  LANEPIC 10/100 [EVB171G-PCI]
+		10b8 a011  EtherPower II 10/100
+		10b8 a014  EtherPower II 10/100
+		10b8 a015  EtherPower II 10/100
+		10b8 a016  EtherPower II 10/100
+		10b8 a017  EtherPower II 10/100
+	0006  LANEPIC
+		1055 e100  LANEPIC Cardbus Fast Ethernet Adapter
+		1055 e102  LANEPIC Cardbus Fast Ethernet Adapter
+		1055 e300  LANEPIC Cardbus Fast Ethernet Adapter
+		1055 e302  LANEPIC Cardbus Fast Ethernet Adapter
+		10b8 a012  LANEPIC Cardbus Fast Ethernet Adapter
+		13a2 8002  LANEPIC Cardbus Fast Ethernet Adapter
+		13a2 8006  LANEPIC Cardbus Fast Ethernet Adapter
+	1000  FDC 37c665
+	1001  FDC 37C922
+	a011  83C170QF
+	b106  SMC34C90
+10b9  Acer Laboratories Inc. [ALi]
+	0111  C-Media CMI8738/C3DX Audio Device (OEM)
+		10b9 0111  C-Media CMI8738/C3DX Audio Device (OEM)
+	1435  M1435
+	1445  M1445
+	1449  M1449
+	1451  M1451
+	1461  M1461
+	1489  M1489
+	1511  M1511 [Aladdin]
+	1512  M1512 [Aladdin]
+	1513  M1513 [Aladdin]
+	1521  M1521 [Aladdin III]
+		10b9 1521  ALI M1521 Aladdin III CPU Bridge
+	1523  M1523
+		10b9 1523  ALI M1523 ISA Bridge
+	1531  M1531 [Aladdin IV]
+	1533  M1533 PCI to ISA Bridge [Aladdin IV]
+		10b9 1533  ALI M1533 Aladdin IV ISA Bridge
+	1541  M1541
+		10b9 1541  ALI M1541 Aladdin V/V+ AGP System Controller
+	1543  M1543
+	1621  M1621
+	1631  ALI M1631 PCI North Bridge Aladdin Pro III
+	1632  M1632M Northbridge+Trident
+	1641  ALI M1641 PCI North Bridge Aladdin Pro IV
+	1644  M1644/M1644T Northbridge+Trident
+	1646  M1646 Northbridge+Trident
+	1647  M1647 Northbridge [MAGiK 1 / MobileMAGiK 1]
+	1651  M1651/M1651T Northbridge [Aladdin-Pro 5/5M,Aladdin-Pro 5T/5TM]
+	1671  M1671 Northbridge [Aladdin-P4]
+	3141  M3141
+	3143  M3143
+	3145  M3145
+	3147  M3147
+	3149  M3149
+	3151  M3151
+	3307  M3307
+	3309  M3309
+	5212  M4803
+	5215  MS4803
+	5217  M5217H
+	5219  M5219
+	5225  M5225
+	5229  M5229 IDE
+		1043 8053  A7A266 Motherboard IDE
+	5235  M5225
+	5237  USB 1.1 Controller
+	5239  USB 2.0 Controller
+	5243  M1541 PCI to AGP Controller
+	5247  PCI to AGP Controller
+	5251  M5251 P1394 OHCI 1.0 Controller
+	5253  M5253 P1394 OHCI 1.1 Controller
+	5261  M5261 Ethernet Controller
+	5451  M5451 PCI AC-Link Controller Audio Device
+	5453  M5453 PCI AC-Link Controller Modem Device
+	5455  M5455 PCI AC-Link Controller Audio Device
+	5457  M5457 AC-Link Modem Interface Controller
+	5471  M5471 Memory Stick Controller
+	5473  M5473 SD-MMC Controller
+	7101  M7101 PMU
+		10b9 7101  ALI M7101 Power Management Controller
+10ba  Mitsubishi Electric Corp.
+	0301  AccelGraphics AccelECLIPSE
+10bb  Dapha Electronics Corporation
+10bc  Advanced Logic Research
+10bd  Surecom Technology
+	0e34  NE-34
+10be  Tseng Labs International Co.
+10bf  Most Inc
+10c0  Boca Research Inc.
+10c1  ICM Co., Ltd.
+10c2  Auspex Systems Inc.
+10c3  Samsung Semiconductors, Inc.
+	1100  Smartether100 SC1100 LAN Adapter (i82557B)
+10c4  Award Software International Inc.
+10c5  Xerox Corporation
+10c6  Rambus Inc.
+10c7  Media Vision
+10c8  Neomagic Corporation
+	0001  NM2070 [MagicGraph NM2070]
+	0002  NM2090 [MagicGraph 128V]
+	0003  NM2093 [MagicGraph 128ZV]
+	0004  NM2160 [MagicGraph 128XD]
+		1014 00ba  MagicGraph 128XD
+		1025 1007  MagicGraph 128XD
+		1028 0074  MagicGraph 128XD
+		1028 0075  MagicGraph 128XD
+		1028 007d  MagicGraph 128XD
+		1028 007e  MagicGraph 128XD
+		1033 802f  MagicGraph 128XD
+		104d 801b  MagicGraph 128XD
+		104d 802f  MagicGraph 128XD
+		104d 830b  MagicGraph 128XD
+		10ba 0e00  MagicGraph 128XD
+		10c8 0004  MagicGraph 128XD
+		10cf 1029  MagicGraph 128XD
+		10f7 8308  MagicGraph 128XD
+		10f7 8309  MagicGraph 128XD
+		10f7 830b  MagicGraph 128XD
+		10f7 830d  MagicGraph 128XD
+		10f7 8312  MagicGraph 128XD
+	0005  [MagicMedia 256AV]
+	0006  NM2360 [MagicMedia 256ZX]
+	0016  NM2380 [MagicMedia 256XL+]
+		10c8 0016  MagicMedia 256XL+
+	0025  [MagicMedia 256AV+]
+	0083  [MagicGraph 128ZV Plus]
+	8005  [MagicMedia 256AV Audio]
+		0e11 b0d1  MagicMedia 256AV Audio Device on Discovery
+		0e11 b126  MagicMedia 256AV Audio Device on Durango
+		1014 00dd  MagicMedia 256AV Audio Device on BlackTip Thinkpad
+		1025 1003  MagicMedia 256AV Audio Device on TravelMate 720
+		1028 008f  MagicMedia 256AV Audio Device on Colorado Inspiron
+		103c 0007  MagicMedia 256AV Audio Device on Voyager II
+		103c 0008  MagicMedia 256AV Audio Device on Voyager III
+		103c 000d  MagicMedia 256AV Audio Device on Omnibook 900
+		10c8 8005  MagicMedia 256AV Audio Device on FireAnt
+		110a 8005  MagicMedia 256AV Audio Device
+		14c0 0004  MagicMedia 256AV Audio Device
+	8006  NM2360 [MagicMedia 256ZX Audio]
+	8016  NM2360 [MagicMedia 256ZX Audio]
+10c9  Dataexpert Corporation
+10ca  Fujitsu Microelectr., Inc.
+10cb  Omron Corporation
+10cc  Mentor ARC Inc
+10cd  Advanced System Products, Inc
+	1100  ASC1100
+	1200  ASC1200 [(abp940) Fast SCSI-II]
+	1300  ABP940-U / ABP960-U
+		10cd 1310  ASC1300 SCSI Adapter
+	2300  ABP940-UW
+	2500  ABP940-U2W
+10ce  Radius
+10cf  Citicorp TTI
+	2001  mb86605
+10d0  Fujitsu Limited
+10d1  FuturePlus Systems Corp.
+10d2  Molex Incorporated
+10d3  Jabil Circuit Inc
+10d4  Hualon Microelectronics
+10d5  Autologic Inc.
+10d6  Cetia
+10d7  BCM Advanced Research
+10d8  Advanced Peripherals Labs
+10d9  Macronix, Inc. [MXIC]
+	0512  MX98713
+	0531  MX987x5
+		1186 1200  DFE-540TX ProFAST 10/100 Adapter
+	8625  MX86250
+	8888  MX86200
+10da  Compaq IPG-Austin
+	0508  TC4048 Token Ring 4/16
+	3390  Tl3c3x9
+10db  Rohm LSI Systems, Inc.
+10dc  CERN/ECP/EDU
+	0001  STAR/RD24 SCI-PCI (PMC)
+	0002  TAR/RD24 SCI-PCI (PMC)
+	0021  HIPPI destination
+	0022  HIPPI source
+	10dc  ATT2C15-3 FPGA
+10dd  Evans & Sutherland
+10de  nVidia Corporation
+	0008  NV1 [EDGE 3D]
+	0009  NV1 [EDGE 3D]
+	0010  NV2 [Mutara V08]
+	0020  NV4 [Riva TnT]
+		1043 0200  V3400 TNT
+		1048 0c18  Erazor II SGRAM
+		1048 0c1b  Erazor II
+		1092 0550  Viper V550
+		1092 0552  Viper V550
+		1092 4804  Viper V550
+		1092 4808  Viper V550
+		1092 4810  Viper V550
+		1092 4812  Viper V550
+		1092 4815  Viper V550
+		1092 4820  Viper V550 with TV out
+		1092 4822  Viper V550
+		1092 4904  Viper V550
+		1092 4914  Viper V550
+		1092 8225  Viper V550
+		10b4 273d  Velocity 4400
+		10b4 2740  Velocity 4400
+		10de 0020  Riva TNT
+		1102 1015  Graphics Blaster CT6710
+		1102 1016  Graphics Blaster RIVA TNT
+	0028  NV5 [Riva TnT2]
+		1043 0200  AGP-V3800 SGRAM
+		1043 0201  AGP-V3800 SDRAM
+		1043 0205  PCI-V3800
+		1043 4000  AGP-V3800PRO
+		1092 4804  Viper V770
+		1092 4a00  Viper V770
+		1092 4a02  Viper V770 Ultra
+		1092 5a00  RIVA TNT2/TNT2 Pro
+		1092 6a02  Viper V770 Ultra
+		1092 7a02  Viper V770 Ultra
+		10de 0005  RIVA TNT2 Pro
+		10de 000f  Compaq NVIDIA TNT2 Pro
+		1102 1020  3D Blaster RIVA TNT2
+		1102 1026  3D Blaster RIVA TNT2 Digital
+		14af 5810  Maxi Gamer Xentor
+	0029  NV5 [Riva TnT2 Ultra]
+		1043 0200  AGP-V3800 Deluxe
+		1043 0201  AGP-V3800 Ultra SDRAM
+		1043 0205  PCI-V3800 Ultra
+		1102 1021  3D Blaster RIVA TNT2 Ultra
+		1102 1029  3D Blaster RIVA TNT2 Ultra
+		1102 102f  3D Blaster RIVA TNT2 Ultra
+		14af 5820  Maxi Gamer Xentor 32
+	002a  NV5 [Riva TnT2]
+	002b  NV5 [Riva TnT2]
+	002c  NV6 [Vanta]
+		1043 0200  AGP-V3800 Combat SDRAM
+		1043 0201  AGP-V3800 Combat
+		1092 6820  Viper V730
+		1102 1031  CT6938 VANTA 8MB
+		1102 1034  CT6894 VANTA 16MB
+		14af 5008  Maxi Gamer Phoenix 2
+	002d  RIVA TNT2 Model 64
+		1043 0200  AGP-V3800M
+		1043 0201  AGP-V3800M
+		1102 1023  CT6892 RIVA TNT2 Value
+		1102 1024  CT6932 RIVA TNT2 Value 32Mb
+		1102 102c  CT6931 RIVA TNT2 Value [Jumper]
+		1462 8808  MSI-8808
+		1554 1041  PixelView RIVA TNT2 M64 32MB
+	002e  NV6 [Vanta]
+	002f  NV6 [Vanta]
+	00a0  NV5 [Riva TNT2]
+		14af 5810  Maxi Gamer Xentor
+	0100  NV10 [GeForce 256 SDR]
+		1043 0200  AGP-V6600 SGRAM
+		1043 0201  AGP-V6600 SDRAM
+		1043 4008  AGP-V6600 SGRAM
+		1043 4009  AGP-V6600 SDRAM
+		1102 102d  CT6941 GeForce 256
+		14af 5022  3D Prophet SE
+	0101  NV10 [GeForce 256 DDR]
+		1043 0202  AGP-V6800 DDR
+		1043 400a  AGP-V6800 DDR SGRAM
+		1043 400b  AGP-V6800 DDR SDRAM
+		1102 102e  CT6971 GeForce 256 DDR
+		14af 5021  3D Prophet DDR-DVI
+	0103  NV10 [Quadro]
+	0110  NV11 [GeForce2 MX]
+		1043 4015  AGP-V7100 Pro
+		1043 4031  V7100 Pro with TV output
+		14af 7103  3D Prophet II MX Dual-Display
+	0111  NV11 [GeForce2 MX DDR]
+	0112  NV11 [GeForce2 Go]
+	0113  NV11 [GeForce2 MXR]
+	0150  NV15 [GeForce2 GTS]
+		1043 4016  V7700 AGP Video Card
+		107d 2840  WinFast GeForce2 GTS with TV output
+		1462 8831  Creative GeForce2 Pro
+	0151  NV15 [GeForce2 Ti]
+	0152  NV15 [GeForce2 Ultra, Bladerunner]
+		1048 0c56  GLADIAC Ultra
+	0153  NV15 [Quadro2 Pro]
+	0170  NV17 [GeForce4 MX460]
+	0171  NV17 [GeForce4 MX440]
+	0172  NV17 [GeForce4 MX420]
+	0173  NV1x
+	0174  NV17 [GeForce4 440 Go]
+	0175  NV17 [GeForce4 420 Go]
+	0176  NV17 [GeForce4 420 Go 32M]
+	0178  Quadro4 500XGL
+	0179  NV17 [GeForce4 440 Go 64M]
+	017a  Quadro4 200/400NVS
+	017b  Quadro4 550XGL
+	017c  Quadro4 550 GoGL
+	01a0  NV15 [GeForce2 - nForce GPU]
+	01a4  nForce CPU bridge
+	01ab  nForce 420 Memory Controller (DDR)
+	01ac  nForce 220/420 Memory Controller
+	01ad  nForce 220/420 Memory Controller
+	01b1  nForce Audio
+	01b2  nForce ISA Bridge
+	01b4  nForce PCI System Management
+	01b7  nForce AGP to PCI Bridge
+	01b8  nForce PCI-to-PCI bridge
+	01bc  nForce IDE
+	0200  NV20 [GeForce3]
+		1043 402f  AGP-V8200 DDR
+	0201  NV20 [GeForce3 Ti200]
+	0202  NV20 [GeForce3 Ti500]
+		1043 405b  V8200 T5
+	0203  NV20 [Quadro DCC]
+	0250  NV25 [GeForce4 Ti4600]
+	0251  NV25 [GeForce4 Ti4400]
+	0253  NV25 [GeForce4 Ti4200]
+	0258  Quadro4 900XGL
+	0259  Quadro4 750XGL
+	025b  Quadro4 700XGL
+10df  Emulex Corporation
+	10df  Light Pulse Fibre Channel Adapter
+	1ae5  LP6000 Fibre Channel Host Adapter
+	f700  LP7000 Fibre Channel Host Adapter
+	f800  LP8000 Fibre Channel Host Adapter
+	f900  LP9000 Fibre Channel Host Adapter
+10e0  Integrated Micro Solutions Inc.
+	5026  IMS5026/27/28
+	5027  IMS5027
+	5028  IMS5028
+	8849  IMS8849
+	8853  IMS8853
+	9128  IMS9129 [Twin turbo 128]
+10e1  Tekram Technology Co.,Ltd.
+	0391  TRM-S1040
+		10e1 0391  DC-315U SCSI-3 Host Adapter
+	690c  DC-690c
+	dc29  DC-290
+10e2  Aptix Corporation
+10e3  Tundra Semiconductor Corp.
+	0000  CA91C042 [Universe]
+	0860  CA91C860 [QSpan]
+10e4  Tandem Computers
+10e5  Micro Industries Corporation
+10e6  Gainbery Computer Products Inc.
+10e7  Vadem
+10e8  Applied Micro Circuits Corp.
+	2011  Q-Motion Video Capture/Edit board
+	4750  S5930 [Matchmaker]
+	5920  S5920
+	8043  LANai4.x [Myrinet LANai interface chip]
+	8062  S5933_PARASTATION
+	807d  S5933 [Matchmaker]
+	8088  Kongsberg Spacetec Format Synchronizer
+	8089  Kongsberg Spacetec Serial Output Board
+	809c  S5933_HEPC3
+	80d7  PCI-9112
+	80d9  PCI-9118
+	80da  PCI-9812
+	811a  PCI-IEEE1355-DS-DE Interface
+	8170  S5933 [Matchmaker] (Chipset Development Tool)
+	82db  AJA HDNTV HD SDI Framestore
+10e9  Alps Electric Co., Ltd.
+10ea  Intergraphics Systems
+	1680  IGA-1680
+	1682  IGA-1682
+	1683  IGA-1683
+	2000  CyberPro 2000
+	2010  CyberPro 2000A
+	5000  CyberPro 5000
+	5050  CyberPro 5050
+10eb  Artists Graphics
+	0101  3GA
+	8111  Twist3 Frame Grabber
+10ec  Realtek Semiconductor Co., Ltd.
+	8029  RTL-8029(AS)
+		10b8 2011  EZ-Card (SMC1208)
+		10ec 8029  RTL-8029(AS)
+		1113 1208  EN1208
+		1186 0300  DE-528
+		1259 2400  AT-2400
+	8129  RTL-8129
+		10ec 8129  RT8129 Fast Ethernet Adapter
+	8138  RT8139 (B/C) Cardbus Fast Ethernet Adapter
+		10ec 8138  RT8139 (B/C) Fast Ethernet Adapter
+	8139  RTL-8139/8139C/8139C+
+		1025 8920  ALN-325
+		1025 8921  ALN-325
+		10bd 0320  EP-320X-R
+		10ec 8139  RT8139
+		1186 1300  DFE-538TX
+		1186 1320  SN5200
+		1186 8139  DRN-32TX
+		1259 2500  AT-2500TX
+		1259 2503  AT-2500TX/ACPI
+		1429 d010  ND010
+		1432 9130  EN-9130TX
+		1436 8139  RT8139
+		146c 1439  FE-1439TX
+		1489 6001  GF100TXRII
+		1489 6002  GF100TXRA
+		149c 139a  LFE-8139ATX
+		149c 8139  LFE-8139TX
+		2646 0001  EtheRx
+		8e2e 7000  KF-230TX
+		8e2e 7100  KF-230TX/2
+		a0a0 0007  ALN-325C
+	8169  RTL-8169
+10ed  Ascii Corporation
+	7310  V7310
+10ee  Xilinx, Inc.
+	3fc0  RME Digi96
+	3fc1  RME Digi96/8
+	3fc2  RME Digi96/8 Pro
+	3fc3  RME Digi96/8 Pad
+	3fc4  RME Digi9652 (Hammerfall)
+	3fc5  RME Hammerfall DSP
+10ef  Racore Computer Products, Inc.
+	8154  M815x Token Ring Adapter
+10f0  Peritek Corporation
+10f1  Tyan Computer
+10f2  Achme Computer, Inc.
+10f3  Alaris, Inc.
+10f4  S-MOS Systems, Inc.
+10f5  NKK Corporation
+	a001  NDR4000 [NR4600 Bridge]
+10f6  Creative Electronic Systems SA
+10f7  Matsushita Electric Industrial Co., Ltd.
+10f8  Altos India Ltd
+10f9  PC Direct
+10fa  Truevision
+	000c  TARGA 1000
+10fb  Thesys Gesellschaft f�r Mikroelektronik mbH
+10fc  I-O Data Device, Inc.
+# What's in the cardbus end of a Sony ACR-A01 card, comes with newer Vaio CD-RW drives
+	0003  Cardbus IDE Controller
+	0005  Cardbus SCSI CBSC II
+10fd  Soyo Computer, Inc
+10fe  Fast Multimedia AG
+10ff  NCube
+1100  Jazz Multimedia
+1101  Initio Corporation
+	1060  INI-A100U2W
+	9100  INI-9100/9100W
+	9400  INI-940
+	9401  INI-950
+	9500  360P
+1102  Creative Labs
+	0002  SB Live! EMU10k1
+		1102 0020  CT4850 SBLive! Value
+		1102 0021  CT4620 SBLive!
+		1102 002f  SBLive! mainboard implementation
+		1102 4001  E-mu APS
+		1102 8022  CT4780 SBLive! Value
+		1102 8023  CT4790 SoundBlaster PCI512
+		1102 8024  CT4760 SBLive!
+		1102 8025  SBLive! Mainboard Implementation
+		1102 8026  CT4830 SBLive! Value
+		1102 8027  CT4832 SBLive! Value
+		1102 8028  CT4760 SBLive! OEM version
+		1102 8031  CT4831 SBLive! Value
+		1102 8040  CT4760 SBLive!
+		1102 8051  CT4850 SBLive! Value
+		1102 8061  SBLive! Player 5.1
+	0004  SB Audigy
+		1102 0051  SB0090 Audigy Player
+	4001  SB Audigy FireWire Port
+	7002  SB Live! MIDI/Game Port
+		1102 0020  Gameport Joystick
+	7003  SB Audigy MIDI/Game port
+		1102 0040  SB Audigy MIDI/Gameport
+	8938  ES1371
+1103  Triones Technologies, Inc.
+	0003  HPT343
+# Revisions: 01=HPT366, 03=HPT370, 04=HPT370A, 05=HPT372
+	0004  HPT366/368/370/370A/372
+		1103 0001  HPT370A
+		1103 0005  HPT370 UDMA100
+	0005  HPT372A
+	0006  HPT302
+	0007  HPT371
+	0008  HPT374
+1104  RasterOps Corp.
+1105  Sigma Designs, Inc.
+	1105  REALmagic Xcard MPEG 1/2/3/4 DVD Decoder
+	8300  REALmagic Hollywood Plus DVD Decoder
+	8400  EM840x REALmagic DVD/MPEG-2 Audio/Video Decoder
+1106  VIA Technologies, Inc.
+	0130  VT6305 1394.A Controller
+	0305  VT8363/8365 [KT133/KM133]
+		1043 8033  A7V Mainboard
+		1043 8042  A7V133/A7V133-C Mainboard
+		147b a401  KT7/KT7-RAID/KT7A/KT7A-RAID Mainboard
+	0391  VT8371 [KX133]
+	0501  VT8501 [Apollo MVP4]
+	0505  VT82C505
+	0561  VT82C561
+	0571  VT82C586B PIPC Bus Master IDE
+		1458 5002 GA-7VAX Mainboard
+	0576  VT82C576 3V [Apollo Master]
+	0585  VT82C585VP [Apollo VP1/VPX]
+	0586  VT82C586/A/B PCI-to-ISA [Apollo VP]
+		1106 0000  MVP3 ISA Bridge
+	0595  VT82C595 [Apollo VP2]
+	0596  VT82C596 ISA [Mobile South]
+		1106 0000  VT82C596/A/B PCI to ISA Bridge
+		1458 0596  VT82C596/A/B PCI to ISA Bridge
+	0597  VT82C597 [Apollo VP3]
+	0598  VT82C598 [Apollo MVP3]
+	0601  VT8601 [Apollo ProMedia]
+	0605  VT8605 [ProSavage PM133]
+	0680  VT82C680 [Apollo P6]
+	0686  VT82C686 [Apollo Super South]
+		1043 8033  A7V Mainboard
+		1043 8042  A7V133/A7V133-C Mainboard
+		1106 0000  VT82C686/A PCI to ISA Bridge
+		1106 0686  VT82C686/A PCI to ISA Bridge
+	0691  VT82C693A/694x [Apollo PRO133x]
+		1458 0691  VT82C691 Apollo Pro System Controller
+	0693  VT82C693 [Apollo Pro Plus]
+	0698  VT82C693A [Apollo Pro133 AGP]
+	0926  VT82C926 [Amazon]
+	1000  VT82C570MV
+	1106  VT82C570MV
+	1571  VT82C416MV
+	1595  VT82C595/97 [Apollo VP2/97]
+	3038  USB
+		0925 1234  USB Controller
+		1234 0925  MVP3 USB Controller
+	3040  VT82C586B ACPI
+	3043  VT86C100A [Rhine]
+		10bd 0000  VT86C100A Fast Ethernet Adapter
+		1106 0100  VT86C100A Fast Ethernet Adapter
+		1186 1400  DFE-530TX rev A
+	3044  IEEE 1394 Host Controller
+	3050  VT82C596 Power Management
+	3051  VT82C596 Power Management
+	3057  VT82C686 [Apollo Super ACPI]
+		1043 8033  A7V Mainboard
+		1043 8042  A7V133/A7V133-C Mainboard
+	3058  VT82C686 AC97 Audio Controller
+		0e11 b194  Soundmax integrated digital audio
+		1106 4511  Onboard Audio on EP7KXA
+		1458 7600  Onboard Audio
+		1462 3091  MS-6309 Onboard Audio
+		15dd 7609  Onboard Audio
+	3059  VT8233 AC97 Audio Controller
+		1458 a002  GA-7VAX Onboard Audio (Realtek ALC650) 
+	3065  VT6102 [Rhine-II]
+		1186 1400  DFE-530TX rev A
+		1186 1401  DFE-530TX rev B
+	3068  AC97 Modem Controller
+	3074  VT8233 PCI to ISA Bridge
+	3091  VT8633 [Apollo Pro266]
+	3099  VT8367 [KT266]
+		1043 8064  A7V266-E
+		1043 807f  A7V333
+	3101  VT8653 Host Bridge
+	3102  VT8662 Host Bridge
+	3103  VT8615 Host Bridge
+	3104  USB 2.0
+		1458 5004  GA-7VAX Mainboard
+	3109  VT8233C PCI to ISA Bridge
+	3112  VT8361 [KLE133] Host Bridge
+	3128  VT8753 [P4X266 AGP]
+	3133  VT3133 Host Bridge
+	3147  VT8233A ISA Bridge
+	3148  P4M266 Host Bridge
+	3156  P/KN266 Host Bridge
+	3177  VT8233A ISA Bridge
+		1458 5001 GA-7VAX Mainboard
+	3189  VT8377 [KT400 AGP] Host Bridge
+		1458 5000 GA-7VAX Mainboard
+	5030  VT82C596 ACPI [Apollo PRO]
+	6100  VT85C100A [Rhine II]
+	8231  VT8231 [PCI-to-ISA Bridge]
+	8235  VT8235 ACPI
+	8305  VT8363/8365 [KT133/KM133 AGP]
+	8391  VT8371 [KX133 AGP]
+	8501  VT8501 [Apollo MVP4 AGP]
+	8596  VT82C596 [Apollo PRO AGP]
+	8597  VT82C597 [Apollo VP3 AGP]
+	8598  VT82C598/694x [Apollo MVP3/Pro133x AGP]
+	8601  VT8601 [Apollo ProMedia AGP]
+	8605  VT8605 [PM133 AGP]
+	8691  VT82C691 [Apollo Pro]
+	8693  VT82C693 [Apollo Pro Plus] PCI Bridge
+	b091  VT8633 [Apollo Pro266 AGP]
+	b099  VT8367 [KT333 AGP]
+	b101  VT8653 AGP Bridge
+	b102  VT8362 AGP Bridge
+	b103  VT8615 AGP Bridge
+	b112  VT8361 [KLE133] AGP Bridge
+	b168  VT8235 PCI Bridge
+1107  Stratus Computers
+	0576  VIA VT82C570MV [Apollo] (Wrong vendor ID!)
+1108  Proteon, Inc.
+	0100  p1690plus_AA
+	0101  p1690plus_AB
+	0105  P1690Plus
+	0108  P1690Plus
+	0138  P1690Plus
+	0139  P1690Plus
+	013c  P1690Plus
+	013d  P1690Plus
+1109  Cogent Data Technologies, Inc.
+	1400  EM110TX [EX110TX]
+110a  Siemens Nixdorf AG
+	0002  Pirahna 2-port
+	0005  Tulip controller, power management, switch extender
+	2102  DSCC4 WAN adapter
+	4942  FPGA I-Bus Tracer for MBD
+	6120  SZB6120
+110b  Chromatic Research Inc.
+	0001  Mpact Media Processor
+	0004  Mpact 2
+110c  Mini-Max Technology, Inc.
+110d  Znyx Advanced Systems
+110e  CPU Technology
+110f  Ross Technology
+1110  Powerhouse Systems
+	6037  Firepower Powerized SMP I/O ASIC
+	6073  Firepower Powerized SMP I/O ASIC
+1111  Santa Cruz Operation
+# DJ: Some people say that 0x1112 is Rockwell International
+1112  RNS - Div. of Meret Communications Inc
+	2200  FDDI Adapter
+	2300  Fast Ethernet Adapter
+	2340  4 Port Fast Ethernet Adapter
+	2400  ATM Adapter
+1113  Accton Technology Corporation
+	1211  SMC2-1211TX
+		103c 1207  EN-1207D Fast Ethernet Adapter
+		1113 1211  EN-1207D Fast Ethernet Adapter
+	1216  EN-1216 Ethernet Adapter
+	1217  EN-1217 Ethernet Adapter
+	5105  10Mbps Network card
+	9211  EN-1207D Fast Ethernet Adapter
+		1113 9211  EN-1207D Fast Ethernet Adapter
+	9511  Fast Ethernet Adapter
+1114  Atmel Corporation
+1115  3D Labs
+1116  Data Translation
+	0022  DT3001
+	0023  DT3002
+	0024  DT3003
+	0025  DT3004
+	0026  DT3005
+	0027  DT3001-PGL
+	0028  DT3003-PGL
+1117  Datacube, Inc
+	9500  Max-1C SVGA card
+	9501  Max-1C image processing
+1118  Berg Electronics
+1119  ICP Vortex Computersysteme GmbH
+	0000  GDT 6000/6020/6050
+	0001  GDT 6000B/6010
+	0002  GDT 6110/6510
+	0003  GDT 6120/6520
+	0004  GDT 6530
+	0005  GDT 6550
+	0006  GDT 6x17
+	0007  GDT 6x27
+	0008  GDT 6537
+	0009  GDT 6557
+	000a  GDT 6115/6515
+	000b  GDT 6125/6525
+	000c  GDT 6535
+	000d  GDT 6555
+	0100  GDT 6117RP/6517RP
+	0101  GDT 6127RP/6527RP
+	0102  GDT 6537RP
+	0103  GDT 6557RP
+	0104  GDT 6111RP/6511RP
+	0105  GDT 6121RP/6521RP
+	0110  GDT 6117RD/6517RD
+	0111  GDT 6127RD/6527RD
+	0112  GDT 6537RD
+	0113  GDT 6557RD
+	0114  GDT 6111RD/6511RD
+	0115  GDT 6121RD/6521RD
+	0118  GDT 6118RD/6518RD/6618RD
+	0119  GDT 6128RD/6528RD/6628RD
+	011a  GDT 6538RD/6638RD
+	011b  GDT 6558RD/6658RD
+	0120  GDT 6117RP2/6517RP2
+	0121  GDT 6127RP2/6527RP2
+	0122  GDT 6537RP2
+	0123  GDT 6557RP2
+	0124  GDT 6111RP2/6511RP2
+	0125  GDT 6121RP2/6521RP2
+	0136  GDT 6113RS/6513RS
+	0137  GDT 6123RS/6523RS
+	0138  GDT 6118RS/6518RS/6618RS
+	0139  GDT 6128RS/6528RS/6628RS
+	013a  GDT 6538RS/6638RS
+	013b  GDT 6558RS/6658RS
+	013c  GDT 6533RS/6633RS
+	013d  GDT 6543RS/6643RS
+	013e  GDT 6553RS/6653RS
+	013f  GDT 6563RS/6663RS
+	0166  GDT 7113RN/7513RN/7613RN
+	0167  GDT 7123RN/7523RN/7623RN
+	0168  GDT 7118RN/7518RN/7518RN
+	0169  GDT 7128RN/7528RN/7628RN
+	016a  GDT 7538RN/7638RN
+	016b  GDT 7558RN/7658RN
+	016c  GDT 7533RN/7633RN
+	016d  GDT 7543RN/7643RN
+	016e  GDT 7553RN/7653RN
+	016f  GDT 7563RN/7663RN
+	01d6  GDT 4x13RZ
+	01d7  GDT 4x23RZ
+	01f6  GDT 8x13RZ
+	01f7  GDT 8x23RZ
+	01fc  GDT 8x33RZ
+	01fd  GDT 8x43RZ
+	01fe  GDT 8x53RZ
+	01ff  GDT 8x63RZ
+	0210  GDT 6519RD/6619RD
+	0211  GDT 6529RD/6629RD
+	0260  GDT 7519RN/7619RN
+	0261  GDT 7529RN/7629RN
+	0300  GDT Raid Controller
+111a  Efficient Networks, Inc
+	0000  155P-MF1 (FPGA)
+	0002  155P-MF1 (ASIC)
+	0003  ENI-25P ATM
+		111a 0000  ENI-25p Miniport ATM Adapter
+	0005  SpeedStream (LANAI)
+		111a 0001  ENI-3010 ATM
+		111a 0009  ENI-3060 ADSL (VPI=0)
+		111a 0101  ENI-3010 ATM
+		111a 0109  ENI-3060CO ADSL (VPI=0)
+		111a 0809  ENI-3060 ADSL (VPI=0 or 8)
+		111a 0909  ENI-3060CO ADSL (VPI=0 or 8)
+		111a 0a09  ENI-3060 ADSL (VPI=<0..15>)
+	0007  SpeedStream ADSL
+		111a 1001  ENI-3061 ADSL [ASIC]
+111b  Teledyne Electronic Systems
+111c  Tricord Systems Inc.
+	0001  Powerbis Bridge
+111d  Integrated Device Tech
+	0001  IDT77211 ATM Adapter
+	0003  IDT77252 ATM network controller
+111e  Eldec
+111f  Precision Digital Images
+	4a47  Precision MX Video engine interface
+	5243  Frame capture bus interface
+1120  EMC Corporation
+1121  Zilog
+1122  Multi-tech Systems, Inc.
+1123  Excellent Design, Inc.
+1124  Leutron Vision AG
+1125  Eurocore
+1126  Vigra
+1127  FORE Systems Inc
+	0200  ForeRunner PCA-200 ATM
+	0210  PCA-200PC
+	0250  ATM
+	0300  ForeRunner PCA-200EPC ATM
+	0310  ATM
+	0400  ForeRunnerHE ATM Adapter
+		1127 0400  ForeRunnerHE ATM
+1129  Firmworks
+112a  Hermes Electronics Company, Ltd.
+112b  Linotype - Hell AG
+112c  Zenith Data Systems
+112d  Ravicad
+112e  Infomedia Microelectronics Inc.
+112f  Imaging Technology Inc
+	0000  MVC IC-PCI
+	0001  MVC IM-PCI Video frame grabber/processor
+1130  Computervision
+1131  Philips Semiconductors
+	7130  SAA7130 Video Broadcast Decoder
+# PCI audio and video broadcast decoder (http://www.semiconductors.philips.com/pip/saa7134hl)
+	7134  SAA7134
+	7145  SAA7145
+	7146  SAA7146
+		114b 2003  DVRaptor Video Edit/Capture Card
+		11bd 0006  DV500 Overlay
+		11bd 000a  DV500 Overlay
+1132  Mitel Corp.
+1133  Eicon Technology Corporation
+	7901  EiconCard S90
+	7902  EiconCard S90
+	7911  EiconCard S91
+	7912  EiconCard S91
+	7941  EiconCard S94
+	7942  EiconCard S94
+	7943  EiconCard S94
+	7944  EiconCard S94
+	b921  EiconCard P92
+	b922  EiconCard P92
+	b923  EiconCard P92
+	e001  DIVA 20PRO
+		1133 e001  DIVA Pro 2.0 S/T
+	e002  DIVA 20
+		1133 e002  DIVA 2.0 S/T
+	e003  DIVA 20PRO_U
+		1133 e003  DIVA Pro 2.0 U
+	e004  DIVA 20_U
+		1133 e004  DIVA 2.0 U
+	e005  DIVA LOW
+		1133 e005  DIVA 2.01 S/T
+	e010  DIVA Server BRI-2M
+		1133 e010  DIVA Server BRI-2M
+	e012  DIVA Server BRI-8M
+		1133 e012  DIVA Server BRI-8M
+	e014  DIVA Server PRI-30M
+		1133 e014  DIVA Server PRI-30M
+	e018  DIVA Server BRI-2M/-2F
+1134  Mercury Computer Systems
+	0001  Raceway Bridge
+1135  Fuji Xerox Co Ltd
+	0001  Printer controller
+1136  Momentum Data Systems
+1137  Cisco Systems Inc
+1138  Ziatech Corporation
+	8905  8905 [STD 32 Bridge]
+1139  Dynamic Pictures, Inc
+	0001  VGA Compatable 3D Graphics
+113a  FWB Inc
+113b  Network Computing Devices
+113c  Cyclone Microsystems, Inc.
+	0000  PCI-9060 i960 Bridge
+	0001  PCI-SDK [PCI i960 Evaluation Platform]
+	0911  PCI-911 [i960Jx-based Intelligent I/O Controller]
+	0912  PCI-912 [i960CF-based Intelligent I/O Controller]
+	0913  PCI-913
+	0914  PCI-914 [I/O Controller w/ secondary PCI bus]
+113d  Leading Edge Products Inc
+113e  Sanyo Electric Co - Computer Engineering Dept
+113f  Equinox Systems, Inc.
+	0808  SST-64P Adapter
+	1010  SST-128P Adapter
+	80c0  SST-16P DB Adapter
+	80c4  SST-16P RJ Adapter
+	80c8  SST-16P Adapter
+	8888  SST-4P Adapter
+	9090  SST-8P Adapter
+1140  Intervoice Inc
+1141  Crest Microsystem Inc
+1142  Alliance Semiconductor Corporation
+	3210  AP6410
+	6422  ProVideo 6422
+	6424  ProVideo 6424
+	6425  ProMotion AT25
+	643d  ProMotion AT3D
+1143  NetPower, Inc
+1144  Cincinnati Milacron
+	0001  Noservo controller
+1145  Workbit Corporation
+	f007  NinjaSCSI-32 KME
+	8007  NinjaSCSI-32 Workbit
+	f010  NinjaSCSI-32 Workbit
+	f012  NinjaSCSI-32 Logitec
+	f013  NinjaSCSI-32 Logitec
+	f015  NinjaSCSI-32 Melco
+1146  Force Computers
+1147  Interface Corp
+1148  Syskonnect (Schneider & Koch)
+	4000  FDDI Adapter
+		0e11 b03b  Netelligent 100 FDDI DAS Fibre SC
+		0e11 b03c  Netelligent 100 FDDI SAS Fibre SC
+		0e11 b03d  Netelligent 100 FDDI DAS UTP
+		0e11 b03e  Netelligent 100 FDDI SAS UTP
+		0e11 b03f  Netelligent 100 FDDI SAS Fibre MIC
+		1148 5521  FDDI SK-5521 (SK-NET FDDI-UP)
+		1148 5522  FDDI SK-5522 (SK-NET FDDI-UP DAS)
+		1148 5541  FDDI SK-5541 (SK-NET FDDI-FP)
+		1148 5543  FDDI SK-5543 (SK-NET FDDI-LP)
+		1148 5544  FDDI SK-5544 (SK-NET FDDI-LP DAS)
+		1148 5821  FDDI SK-5821 (SK-NET FDDI-UP64)
+		1148 5822  FDDI SK-5822 (SK-NET FDDI-UP64 DAS)
+		1148 5841  FDDI SK-5841 (SK-NET FDDI-FP64)
+		1148 5843  FDDI SK-5843 (SK-NET FDDI-LP64)
+		1148 5844  FDDI SK-5844 (SK-NET FDDI-LP64 DAS)
+	4200  Token Ring adapter
+	4300  Gigabit Ethernet
+		1148 9821  SK-9821 (1000Base-T single link)
+		1148 9822  SK-9822 (1000Base-T dual link)
+		1148 9841  SK-9841 (1000Base-LX single link)
+		1148 9842  SK-9842 (1000Base-LX dual link)
+		1148 9843  SK-9843 (1000Base-SX single link)
+		1148 9844  SK-9844 (1000Base-SX dual link)
+		1148 9861  SK-9861 (1000Base-SX VF45 single link)
+		1148 9862  SK-9862 (1000Base-SX VF45 dual link)
+	4400  Gigabit Ethernet
+1149  Win System Corporation
+114a  VMIC
+	5579  VMIPCI-5579 (Reflective Memory Card)
+	7587  VMIVME-7587
+114b  Canopus Co., Ltd
+114c  Annabooks
+114d  IC Corporation
+114e  Nikon Systems Inc
+114f  Digi International
+	0002  AccelePort EPC
+	0003  RightSwitch SE-6
+	0004  AccelePort Xem
+	0005  AccelePort Xr
+	0006  AccelePort Xr,C/X
+	0009  AccelePort Xr/J
+	000a  AccelePort EPC/J
+	000c  DataFirePRIme T1 (1-port)
+	000d  SyncPort 2-Port (x.25/FR)
+	0011  AccelePort 8r EIA-232 (IBM)
+	0012  AccelePort 8r EIA-422
+	0013  AccelePort Xr
+	0014  AccelePort 8r EIA-422
+	0015  AccelePort Xem
+	0016  AccelePort EPC/X
+	0017  AccelePort C/X
+	001a  DataFirePRIme E1 (1-port)
+	001b  AccelePort C/X (IBM)
+	001d  DataFire RAS T1/E1/PRI
+		114f 0050  DataFire RAS E1 Adapter
+		114f 0051  DataFire RAS Dual E1 Adapter
+		114f 0052  DataFire RAS T1 Adapter
+		114f 0053  DataFire RAS Dual T1 Adapter
+	0023  AccelePort RAS
+	0024  DataFire RAS B4 ST/U
+		114f 0030  DataFire RAS BRI U Adapter
+		114f 0031  DataFire RAS BRI S/T Adapter
+	0026  AccelePort 4r 920
+	0027  AccelePort Xr 920
+	0034  AccelePort 2r 920
+	0035  DataFire DSP T1/E1/PRI cPCI
+	0040  AccelePort Xp
+	0042  AccelePort 2p PCI
+	0070  Datafire Micro V IOM2 (Europe)
+	0071  Datafire Micro V (Europe)
+	0072  Datafire Micro V IOM2 (North America)
+	0073  Datafire Micro V (North America)
+	6001  Avanstar
+1150  Thinking Machines Corp
+1151  JAE Electronics Inc.
+1152  Megatek
+1153  Land Win Electronic Corp
+1154  Melco Inc
+1155  Pine Technology Ltd
+1156  Periscope Engineering
+1157  Avsys Corporation
+1158  Voarx R & D Inc
+	3011  Tokenet/vg 1001/10m anylan
+	9050  Lanfleet/Truevalue
+	9051  Lanfleet/Truevalue
+1159  Mutech Corp
+	0001  MV-1000
+115a  Harlequin Ltd
+115b  Parallax Graphics
+115c  Photron Ltd.
+115d  Xircom
+	0003  Cardbus Ethernet 10/100
+		1014 0181  10/100 EtherJet Cardbus Adapter
+		1014 1181  10/100 EtherJet Cardbus Adapter
+		1014 8181  10/100 EtherJet Cardbus Adapter
+		1014 9181  10/100 EtherJet Cardbus Adapter
+		115d 0181  Cardbus Ethernet 10/100
+		115d 1181  Cardbus Ethernet 10/100
+		1179 0181  Cardbus Ethernet 10/100
+		8086 8181  EtherExpress PRO/100 Mobile CardBus 32 Adapter
+		8086 9181  EtherExpress PRO/100 Mobile CardBus 32 Adapter
+	0005  Cardbus Ethernet 10/100
+		1014 0182  10/100 EtherJet Cardbus Adapter
+		1014 1182  10/100 EtherJet Cardbus Adapter
+		115d 0182  Cardbus Ethernet 10/100
+		115d 1182  Cardbus Ethernet 10/100
+	0007  Cardbus Ethernet 10/100
+		1014 0182  10/100 EtherJet Cardbus Adapter
+		1014 1182  10/100 EtherJet Cardbus Adapter
+		115d 0182  Cardbus Ethernet 10/100
+		115d 1182  Cardbus Ethernet 10/100
+	000b  Cardbus Ethernet 10/100
+		1014 0183  10/100 EtherJet Cardbus Adapter
+		115d 0183  Cardbus Ethernet 10/100
+	000c  Mini-PCI V.90 56k Modem
+	000f  Cardbus Ethernet 10/100
+		1014 0183  10/100 EtherJet Cardbus Adapter
+		115d 0183  Cardbus Ethernet 10/100
+	0101  Cardbus 56k modem
+		115d 1081  Cardbus 56k Modem
+	0103  Cardbus Ethernet + 56k Modem
+		1014 9181  Cardbus 56k Modem
+		1115 1181  Cardbus Ethernet 100 + 56k Modem
+		115d 1181  CBEM56G-100 Ethernet + 56k Modem
+		8086 9181  PRO/100 LAN + Modem56 CardBus
+115e  Peer Protocols Inc
+115f  Maxtor Corporation
+1160  Megasoft Inc
+1161  PFU Limited
+1162  OA Laboratory Co Ltd
+1163  Rendition
+	0001  Verite 1000
+	2000  Verite V2000/V2100/V2200
+		1092 2000  Stealth II S220
+1164  Advanced Peripherals Technologies
+1165  Imagraph Corporation
+	0001  Motion TPEG Recorder/Player with audio
+1166  ServerWorks
+	0005  CNB20-LE Host Bridge
+	0007  CNB20-LE Host Bridge
+	0008  CNB20HE Host Bridge
+	0009  CNB20LE Host Bridge
+	0010  CIOB30
+	0011  CMIC-HE
+	0013  CNB20-HE Host Bridge
+	0014  CNB20-HE Host Bridge
+	0015  CMIC-GC Host Bridge
+	0016  CMIC-GC Host Bridge
+	0017  GCNB-LE Host Bridge
+	0200  OSB4 South Bridge
+	0201  CSB5 South Bridge
+	0203  CSB6 South Bridge
+	0211  OSB4 IDE Controller
+	0212  CSB5 IDE Controller
+	0213  CSB6 RAID/IDE Controller
+	0220  OSB4/CSB5 OHCI USB Controller
+	0221  CSB6 OHCI USB Controller
+	0225  GCLE Host Bridge
+	0227  GCLE-2 Host Bridge
+1167  Mutoh Industries Inc
+1168  Thine Electronics Inc
+1169  Centre for Development of Advanced Computing
+116a  Polaris Communications
+	6100  Bus/Tag Channel
+	6800  Escon Channel
+	7100  Bus/Tag Channel
+	7800  Escon Channel
+116b  Connectware Inc
+116c  Intelligent Resources Integrated Systems
+116d  Martin-Marietta
+116e  Electronics for Imaging
+116f  Workstation Technology
+1170  Inventec Corporation
+1171  Loughborough Sound Images Plc
+1172  Altera Corporation
+1173  Adobe Systems, Inc
+1174  Bridgeport Machines
+1175  Mitron Computer Inc.
+1176  SBE Incorporated
+1177  Silicon Engineering
+1178  Alfa, Inc.
+	afa1  Fast Ethernet Adapter
+1179  Toshiba America Info Systems
+	0103  EX-IDE Type-B
+	0404  DVD Decoder card
+	0406  Tecra Video Capture device
+	0407  DVD Decoder card (Version 2)
+	0601  601
+	0603  ToPIC95 PCI to CardBus Bridge for Notebooks
+	060a  ToPIC95
+	060f  ToPIC97
+	0617  ToPIC95 PCI to Cardbus Bridge with ZV Support
+	0618  CPU to PCI and PCI to ISA bridge
+# Claimed to be Lucent DSP1645 [Mars], but that's apparently incorrect. Does anyone know the correct ID?
+	0701  FIR Port
+	0804  TC6371AF SmartMedia Controller
+	0805  SD TypA Controller
+	0d01  FIR Port Type-DO
+		1179 0001  FIR Port Type-DO
+117a  A-Trend Technology
+117b  L G Electronics, Inc.
+117c  Atto Technology
+117d  Becton & Dickinson
+117e  T/R Systems
+117f  Integrated Circuit Systems
+1180  Ricoh Co Ltd
+	0465  RL5c465
+	0466  RL5c466
+	0475  RL5c475
+	0476  RL5c476 II
+		104d 80df  Vaio PCG-FX403
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	0477  RL5c477
+	0478  RL5c478
+		1014 0184  ThinkPad A30p (2653-64G)
+	0522  R5C522 IEEE 1394 Controller
+		1014 01cf  ThinkPad A30p (2653-64G)
+	0551  R5C551 IEEE 1394 Controller
+	0552  R5C552 IEEE 1394 Controller
+1181  Telmatics International
+1183  Fujikura Ltd
+1184  Forks Inc
+1185  Dataworld International Ltd
+1186  D-Link System Inc
+	0100  DC21041
+	1002  DL10050 Sundance Ethernet
+		1186 1002  DFE-550TX
+		1186 1012  DFE-580TX
+	1300  RTL8139 Ethernet
+		1186 1300  DFE-538TX 10/100 Ethernet Adapter
+		1186 1301  DFE-530TX+ 10/100 Ethernet Adapter
+	1340  DFE-690TXD CardBus PC Card
+	1561  DRP-32TXD Cardbus PC Card
+	4000  DL2K Ethernet
+1187  Advanced Technology Laboratories, Inc.
+1188  Shima Seiki Manufacturing Ltd.
+1189  Matsushita Electronics Co Ltd
+118a  Hilevel Technology
+118b  Hypertec Pty Limited
+118c  Corollary, Inc
+	0014  PCIB [C-bus II to PCI bus host bridge chip]
+	1117  Intel 8-way XEON Profusion Chipset [Cache Coherency Filter]
+118d  BitFlow Inc
+	0001  Raptor-PCI framegrabber
+	0012  Model 12 Road Runner Frame Grabber
+	0014  Model 14 Road Runner Frame Grabber
+	0024  Model 24 Road Runner Frame Grabber
+	0044  Model 44 Road Runner Frame Grabber
+	0112  Model 12 Road Runner Frame Grabber
+	0114  Model 14 Road Runner Frame Grabber
+	0124  Model 24 Road Runner Frame Grabber
+	0144  Model 44 Road Runner Frame Grabber
+	0212  Model 12 Road Runner Frame Grabber
+	0214  Model 14 Road Runner Frame Grabber
+	0224  Model 24 Road Runner Frame Grabber
+	0244  Model 44 Road Runner Frame Grabber
+	0312  Model 12 Road Runner Frame Grabber
+	0314  Model 14 Road Runner Frame Grabber
+	0324  Model 24 Road Runner Frame Grabber
+	0344  Model 44 Road Runner Frame Grabber
+118e  Hermstedt GmbH
+118f  Green Logic
+1190  Tripace
+	c731  TP-910/920/940 PCI Ultra(Wide) SCSI Adapter
+1191  Artop Electronic Corp
+	0003  SCSI Cache Host Adapter
+	0004  ATP8400
+	0005  ATP850UF
+	0006  ATP860 NO-BIOS
+	0007  ATP860
+	0008  ATP865 NO-ROM
+	0009  ATP865
+	8002  AEC6710 SCSI-2 Host Adapter
+	8010  AEC6712UW SCSI
+	8020  AEC6712U SCSI
+	8030  AEC6712S SCSI
+	8040  AEC6712D SCSI
+	8050  AEC6712SUW SCSI
+1192  Densan Company Ltd
+1193  Zeitnet Inc.
+	0001  1221
+	0002  1225
+1194  Toucan Technology
+1195  Ratoc System Inc
+1196  Hytec Electronics Ltd
+1197  Gage Applied Sciences, Inc.
+1198  Lambda Systems Inc
+1199  Attachmate Corporation
+119a  Mind Share, Inc.
+119b  Omega Micro Inc.
+	1221  82C092G
+119c  Information Technology Inst.
+119d  Bug, Inc. Sapporo Japan
+119e  Fujitsu Microelectronics Ltd.
+	0001  FireStream 155
+	0003  FireStream 50
+119f  Bull HN Information Systems
+11a0  Convex Computer Corporation
+11a1  Hamamatsu Photonics K.K.
+11a2  Sierra Research and Technology
+11a3  Deuretzbacher GmbH & Co. Eng. KG
+11a4  Barco Graphics NV
+11a5  Microunity Systems Eng. Inc
+11a6  Pure Data Ltd.
+11a7  Power Computing Corp.
+11a8  Systech Corp.
+11a9  InnoSys Inc.
+	4240  AMCC S933Q Intelligent Serial Card
+11aa  Actel
+11ab  Galileo Technology Ltd.
+	0146  GT-64010
+	4801  GT-48001
+	f003  GT-64010 Primary Image Piranha Image Generator
+11ac  Canon Information Systems Research Aust.
+11ad  Lite-On Communications Inc
+	0002  LNE100TX
+		11ad 0002  LNE100TX
+		11ad 0003  LNE100TX
+		11ad f003  LNE100TX
+		11ad ffff  LNE100TX
+		1385 f004  FA310TX
+	c115  LNE100TX [Linksys EtherFast 10/100]
+		11ad c001  LNE100TX [ver 2.0]
+11ae  Aztech System Ltd
+11af  Avid Technology Inc.
+11b0  V3 Semiconductor Inc.
+	0002  V300PSC
+	0292  V292PBC [Am29030/40 Bridge]
+	0960  V96xPBC
+	c960  V96DPC
+11b1  Apricot Computers
+11b2  Eastman Kodak
+11b3  Barr Systems Inc.
+11b4  Leitch Technology International
+11b5  Radstone Technology Plc
+11b6  United Video Corp
+11b7  Motorola
+11b8  XPoint Technologies, Inc
+	0001  Quad PeerMaster
+11b9  Pathlight Technology Inc.
+	c0ed  SSA Controller
+11ba  Videotron Corp
+11bb  Pyramid Technology
+11bc  Network Peripherals Inc
+	0001  NP-PCI
+11bd  Pinnacle Systems Inc.
+11be  International Microcircuits Inc
+11bf  Astrodesign, Inc.
+11c0  Hewlett Packard
+11c1  Lucent Microelectronics
+	0440  56k WinModem
+		0001 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+		1033 8015  LT WinModem 56k Data+Fax+Voice+Dsvd
+		1033 8047  LT WinModem 56k Data+Fax+Voice+Dsvd
+		1033 804f  LT WinModem 56k Data+Fax+Voice+Dsvd
+		10cf 102c  LB LT Modem V.90 56k
+		10cf 104a  BIBLO LT Modem 56k
+		10cf 105f  LB2 LT Modem V.90 56k
+		1179 0001  Internal V.90 Modem
+		11c1 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+		122d 4101  MDP7800-U Modem
+		122d 4102  MDP7800SP-U Modem
+		13e0 0040  LT WinModem 56k Data+Fax+Voice+Dsvd
+		13e0 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+		13e0 0441  LT WinModem 56k Data+Fax+Voice+Dsvd
+		13e0 0450  LT WinModem 56k Data+Fax+Voice+Dsvd
+		13e0 f100  LT WinModem 56k Data+Fax+Voice+Dsvd
+		13e0 f101  LT WinModem 56k Data+Fax+Voice+Dsvd
+		144d 2101  LT56PV Modem
+		149f 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+	0441  56k WinModem
+		1033 804d  LT WinModem 56k Data+Fax
+		1033 8065  LT WinModem 56k Data+Fax
+		1092 0440  Supra 56i
+		1179 0001  Internal V.90 Modem
+		11c1 0440  LT WinModem 56k Data+Fax
+		11c1 0441  LT WinModem 56k Data+Fax
+		122d 4100  MDP7800-U Modem
+		13e0 0040  LT WinModem 56k Data+Fax
+		13e0 0100  LT WinModem 56k Data+Fax
+		13e0 0410  LT WinModem 56k Data+Fax
+		13e0 0420  TelePath Internet 56k WinModem
+		13e0 0440  LT WinModem 56k Data+Fax
+		13e0 0443  LT WinModem 56k Data+Fax
+		13e0 f102  LT WinModem 56k Data+Fax
+		1416 9804  CommWave 56k Modem
+		141d 0440  LT WinModem 56k Data+Fax
+		144f 0441  Lucent 56k V.90 DF Modem
+		144f 0449  Lucent 56k V.90 DF Modem
+		144f 110d  Lucent Win Modem
+		1468 0441  Presario 56k V.90 DF Modem
+		1668 0440  Lucent Win Modem
+	0442  56k WinModem
+		0001 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		11c1 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		11c1 0442  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		13e0 0412  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		13e0 0442  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		13fc 2471  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		144d 2104  LT56PT Modem
+		144f 1104  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		149f 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		1668 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+	0443  LT WinModem
+	0444  LT WinModem
+	0445  LT WinModem
+	0446  LT WinModem
+	0447  LT WinModem
+	0448  WinModem 56k
+		1014 0131  Lucent Win Modem
+		1033 8066  LT WinModem 56k Data+Fax+Voice+Dsvd
+		13e0 0030  56k Voice Modem
+		13e0 0040  LT WinModem 56k Data+Fax+Voice+Dsvd
+# Actiontech eth+modem card as used by Dell &c.
+		1668 2400  LT WinModem 56k (MiniPCI Ethernet+Modem)
+	0449  WinModem 56k
+		0e11 b14d  56k V.90 Modem
+		13e0 0020  LT WinModem 56k Data+Fax
+		13e0 0041  TelePath Internet 56k WinModem
+		1436 0440  Lucent Win Modem
+		144f 0449  Lucent 56k V.90 DFi Modem
+		1468 0410  IBM ThinkPad T23 (2647-4MG)
+		1468 0440  Lucent Win Modem
+		1468 0449  Presario 56k V.90 DFi Modem
+	044a  F-1156IV WinModem (V90, 56KFlex)
+		10cf 1072  LB Global LT Modem
+		13e0 0012  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		13e0 0042  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+		144f 1005  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+	044b  LT WinModem
+	044c  LT WinModem
+	044d  LT WinModem
+	044e  LT WinModem
+	044f  V90 WildWire Modem
+	0450  LT WinModem
+	0451  LT WinModem
+	0452  LT WinModem
+	0453  LT WinModem
+	0454  LT WinModem
+	0455  LT WinModem
+	0456  LT WinModem
+	0457  LT WinModem
+	0458  LT WinModem
+	0459  LT WinModem
+	045a  LT WinModem
+	0461  V90 WildWire Modem
+	0462  V90 WildWire Modem
+	0480  Venus Modem (V90, 56KFlex)
+	5801  USB
+	5802  USS-312 USB Controller
+	5811  FW323
+		dead 0800  FireWire Host Bus Adapter
+11c2  Sand Microelectronics
+11c3  NEC Corp
+11c4  Document Technologies, Inc
+11c5  Shiva Corporation
+11c6  Dainippon Screen Mfg. Co. Ltd
+11c7  D.C.M. Data Systems
+11c8  Dolphin Interconnect Solutions AS
+	0658  PSB32 SCI-Adapter D31x
+	d665  PSB64 SCI-Adapter D32x
+	d667  PSB66 SCI-Adapter D33x
+11c9  Magma
+	0010  16-line serial port w/- DMA
+	0011  4-line serial port w/- DMA
+11ca  LSI Systems, Inc
+11cb  Specialix Research Ltd.
+	2000  PCI_9050
+		11cb 0200  SX
+		11cb b008  I/O8+
+	4000  SUPI_1
+	8000  T225
+11cc  Michels & Kleberhoff Computer GmbH
+11cd  HAL Computer Systems, Inc.
+11ce  Netaccess
+11cf  Pioneer Electronic Corporation
+11d0  Lockheed Martin Federal Systems-Manassas
+11d1  Auravision
+	01f7  VxP524
+11d2  Intercom Inc.
+11d3  Trancell Systems Inc
+11d4  Analog Devices
+	1805  SM56 PCI modem
+	1889  AD1889 sound chip
+11d5  Ikon Corporation
+	0115  10115
+	0117  10117
+11d6  Tekelec Telecom
+11d7  Trenton Technology, Inc.
+11d8  Image Technologies Development
+11d9  TEC Corporation
+11da  Novell
+11db  Sega Enterprises Ltd
+11dc  Questra Corporation
+11dd  Crosfield Electronics Limited
+11de  Zoran Corporation
+	6057  ZR36057PQC Video cutting chipset
+		1031 7efe  DC10 Plus
+		1031 fc00  MiroVIDEO DC50, Motion JPEG Capture/CODEC Board
+		13ca 4231  JPEG/TV Card
+	6120  ZR36120
+		1328 f001  Cinemaster C DVD Decoder
+11df  New Wave PDG
+11e0  Cray Communications A/S
+11e1  GEC Plessey Semi Inc.
+11e2  Samsung Information Systems America
+11e3  Quicklogic Corporation
+11e4  Second Wave Inc
+11e5  IIX Consulting
+11e6  Mitsui-Zosen System Research
+11e7  Toshiba America, Elec. Company
+11e8  Digital Processing Systems Inc.
+11e9  Highwater Designs Ltd.
+11ea  Elsag Bailey
+11eb  Formation Inc.
+11ec  Coreco Inc
+11ed  Mediamatics
+11ee  Dome Imaging Systems Inc
+11ef  Nicolet Technologies B.V.
+11f0  Compu-Shack
+	4231  FDDI
+	4232  FASTline UTP Quattro
+	4233  FASTline FO
+	4234  FASTline UTP
+	4235  FASTline-II UTP
+	4236  FASTline-II FO
+	4731  GIGAline
+11f1  Symbios Logic Inc
+11f2  Picture Tel Japan K.K.
+11f3  Keithley Metrabyte
+11f4  Kinetic Systems Corporation
+	2915  CAMAC controller
+11f5  Computing Devices International
+11f6  Compex
+	0112  ENet100VG4
+	0113  FreedomLine 100
+	1401  ReadyLink 2000
+	2011  RL100-ATX 10/100
+		11f6 2011  RL100-ATX
+	2201  ReadyLink 100TX (Winbond W89C840)
+		11f6 2011  ReadyLink 100TX
+	9881  RL100TX
+11f7  Scientific Atlanta
+11f8  PMC-Sierra Inc.
+	7375  PM7375 [LASAR-155 ATM SAR]
+11f9  I-Cube Inc
+11fa  Kasan Electronics Company, Ltd.
+11fb  Datel Inc
+11fc  Silicon Magic
+11fd  High Street Consultants
+11fe  Comtrol Corporation
+	0001  RocketPort 8 Oct
+	0002  RocketPort 8 Intf
+	0003  RocketPort 16 Intf
+	0004  RocketPort 32 Intf
+	0005  RocketPort Octacable
+	0006  RocketPort 8J
+	0007  RocketPort 4-port
+	0008  RocketPort 8-port
+	0009  RocketPort 16-port
+	000a  RocketPort Plus Quadcable
+	000b  RocketPort Plus Octacable
+	000c  RocketPort 8-port Modem
+	8015  RocketPort 4-port UART 16954
+11ff  Scion Corporation
+1200  CSS Corporation
+1201  Vista Controls Corp
+1202  Network General Corp.
+1203  Bayer Corporation, Agfa Division
+1204  Lattice Semiconductor Corporation
+1205  Array Corporation
+1206  Amdahl Corporation
+1208  Parsytec GmbH
+	4853  HS-Link Device
+1209  SCI Systems Inc
+120a  Synaptel
+120b  Adaptive Solutions
+120c  Technical Corp.
+120d  Compression Labs, Inc.
+120e  Cyclades Corporation
+	0100  Cyclom-Y below first megabyte
+	0101  Cyclom-Y above first megabyte
+	0102  Cyclom-4Y below first megabyte
+	0103  Cyclom-4Y above first megabyte
+	0104  Cyclom-8Y below first megabyte
+	0105  Cyclom-8Y above first megabyte
+	0200  Cyclades-Z below first megabyte
+	0201  Cyclades-Z above first megabyte
+	0300  PC300/RSV or /X21 (2 ports)
+	0301  PC300/RSV or /X21 (1 port)
+	0310  PC300/TE (2 ports)
+	0311  PC300/TE (1 port)
+	0320  PC300/TE-M (2 ports)
+	0321  PC300/TE-M (1 port)
+	0400  PC400
+120f  Essential Communications
+	0001  Roadrunner serial HIPPI
+1210  Hyperparallel Technologies
+1211  Braintech Inc
+1212  Kingston Technology Corp.
+1213  Applied Intelligent Systems, Inc.
+1214  Performance Technologies, Inc.
+1215  Interware Co., Ltd
+1216  Purup Prepress A/S
+1217  O2 Micro, Inc.
+	6729  OZ6729
+	673a  OZ6730
+	6832  OZ6832/6833 Cardbus Controller
+	6836  OZ6836/6860 Cardbus Controller
+	6872  OZ6812 Cardbus Controller
+	6925  OZ6922 Cardbus Controller
+	6933  OZ6933 Cardbus Controller
+		1025 1016  Travelmate 612 TX
+	6972  OZ6912 Cardbus Controller
+1218  Hybricon Corp.
+1219  First Virtual Corporation
+121a  3Dfx Interactive, Inc.
+	0001  Voodoo
+	0002  Voodoo 2
+	0003  Voodoo Banshee
+		1092 0003  Monster Fusion
+		1092 4000  Monster Fusion
+		1092 4002  Monster Fusion
+		1092 4801  Monster Fusion AGP
+		1092 4803  Monster Fusion AGP
+		1092 8030  Monster Fusion
+		1092 8035  Monster Fusion AGP
+		10b0 0001  Dragon 4000
+		1102 1018  3D Blaster Banshee VE
+		121a 0001  Voodoo Banshee AGP
+		121a 0003  Voodoo Banshee AGP SGRAM
+		121a 0004  Voodoo Banshee
+		139c 0016  Raven
+		139c 0017  Raven
+		14af 0002  Maxi Gamer Phoenix
+		3030 3030  Skywell Magic TwinPower
+	0004  Voodoo Banshee [Velocity 100]
+	0005  Voodoo 3
+		121a 0004  Voodoo3 AGP
+		121a 0030  Voodoo3 AGP
+		121a 0031  Voodoo3 AGP
+		121a 0034  Voodoo3 AGP
+		121a 0036  Voodoo3
+		121a 0037  Voodoo3 AGP
+		121a 0038  Voodoo3 AGP
+		121a 003a  Voodoo3 AGP
+		121a 0044  Voodoo3
+		121a 004b  Velocity 100
+		121a 004c  Velocity 200
+		121a 004d  Voodoo3 AGP
+		121a 004e  Voodoo3 AGP
+		121a 0051  Voodoo3 AGP
+		121a 0052  Voodoo3 AGP
+		121a 0060  Voodoo3 3500 TV (NTSC)
+		121a 0061  Voodoo3 3500 TV (PAL)
+		121a 0062  Voodoo3 3500 TV (SECAM)
+	0009  Voodoo 4 / Voodoo 5
+		121a 0009  Voodoo5 AGP 5500/6000
+	0057  Voodoo 3/3000 [Avenger]
+121b  Advanced Telecommunications Modules
+121c  Nippon Texaco., Ltd
+121d  Lippert Automationstechnik GmbH
+121e  CSPI
+121f  Arcus Technology, Inc.
+1220  Ariel Corporation
+	1220  AMCC 5933 TMS320C80 DSP/Imaging board
+1221  Contec Co., Ltd
+1222  Ancor Communications, Inc.
+1223  Artesyn Communication Products
+	0003  PM/Link
+	0004  PM/T1
+	0005  PM/E1
+	0008  PM/SLS
+	0009  BajaSpan Resource Target
+	000a  BajaSpan Section 0
+	000b  BajaSpan Section 1
+	000c  BajaSpan Section 2
+	000d  BajaSpan Section 3
+	000e  PM/PPC
+1224  Interactive Images
+1225  Power I/O, Inc.
+1227  Tech-Source
+1228  Norsk Elektro Optikk A/S
+1229  Data Kinesis Inc.
+122a  Integrated Telecom
+122b  LG Industrial Systems Co., Ltd
+122c  Sican GmbH
+122d  Aztech System Ltd
+	1206  368DSP
+	50dc  3328 Audio
+		122d 0001  3328 Audio
+	80da  3328 Audio
+		122d 0001  3328 Audio
+122e  Xyratex
+122f  Andrew Corporation
+1230  Fishcamp Engineering
+1231  Woodward McCoach, Inc.
+1232  GPT Limited
+1233  Bus-Tech, Inc.
+1234  Technical Corp.
+1235  Risq Modular Systems, Inc.
+1236  Sigma Designs Corporation
+	0000  RealMagic64/GX
+	6401  REALmagic 64/GX (SD 6425)
+1237  Alta Technology Corporation
+1238  Adtran
+1239  3DO Company
+123a  Visicom Laboratories, Inc.
+123b  Seeq Technology, Inc.
+123c  Century Systems, Inc.
+123d  Engineering Design Team, Inc.
+	0000  EasyConnect 8/32
+	0002  EasyConnect 8/64
+	0003  EasyIO
+123e  Simutech, Inc.
+123f  C-Cube Microsystems
+	00e4  MPEG
+	8120  E4?
+		11bd 0006  DV500 E4
+		11bd 000a  DV500 E4
+	8888  Cinemaster C 3.0 DVD Decoder
+		1002 0001  Cinemaster C 3.0 DVD Decoder
+		1002 0002  Cinemaster C 3.0 DVD Decoder
+		1328 0001  Cinemaster C 3.0 DVD Decoder
+1240  Marathon Technologies Corp.
+1241  DSC Communications
+1242  Jaycor Networks, Inc.
+	1242  JNI Corporation (former Jaycor Networks, Inc.)
+	4643  FCI-1063 Fibre Channel Adapter
+	6562  FCX2-6562 Dual Channel PCI-X Fibre Channel Adapter
+	656a  FCX-6562 PCI-X Fibre Channel Adapter
+1243  Delphax
+1244  AVM Audiovisuelles MKTG & Computer System GmbH
+	0700  B1 ISDN
+	0800  C4 ISDN
+	0a00  A1 ISDN [Fritz]
+		1244 0a00  FRITZ!Card ISDN Controller
+	0e00  Fritz!PCI v2.0 ISDN
+	1100  C2 ISDN
+	1200  T1 ISDN
+1245  A.P.D., S.A.
+1246  Dipix Technologies, Inc.
+1247  Xylon Research, Inc.
+1248  Central Data Corporation
+1249  Samsung Electronics Co., Ltd.
+124a  AEG Electrocom GmbH
+124b  SBS/Greenspring Modular I/O
+	0040  cPCI-200 Four Slot IndustryPack carrier
+		124b 9080  PCI9080 Bridge
+124c  Solitron Technologies, Inc.
+124d  Stallion Technologies, Inc.
+	0000  EasyConnection 8/32
+	0002  EasyConnection 8/64
+	0003  EasyIO
+	0004  EasyConnection/RA
+124e  Cylink
+124f  Infotrend Technology, Inc.
+	0041  IFT-2000 Series RAID Controller
+1250  Hitachi Microcomputer System Ltd
+1251  VLSI Solutions Oy
+1253  Guzik Technical Enterprises
+1254  Linear Systems Ltd.
+1255  Optibase Ltd
+	1110  MPEG Forge
+	1210  MPEG Fusion
+	2110  VideoPlex
+	2120  VideoPlex CC
+	2130  VideoQuest
+1256  Perceptive Solutions, Inc.
+	4201  PCI-2220I
+	4401  PCI-2240I
+	5201  PCI-2000
+1257  Vertex Networks, Inc.
+1258  Gilbarco, Inc.
+1259  Allied Telesyn International
+	2560  AT-2560 Fast Ethernet Adapter (i82557B)
+125a  ABB Power Systems
+125b  Asix Electronics Corporation
+	1400  ALFA GFC2204
+125c  Aurora Technologies, Inc.
+	0640  Aries 16000P
+125d  ESS Technology
+	0000  ES336H Fax Modem (Early Model)
+	1948  Solo?
+	1968  ES1968 Maestro 2
+		1028 0085  ES1968 Maestro-2 PCI
+		1033 8051  ES1968 Maestro-2 Audiodrive
+	1969  ES1969 Solo-1 Audiodrive
+		1014 0166  ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard
+		125d 8888  Solo-1 Audio Adapter
+		525f c888  ES1969 SOLO-1 AudioDrive (+ES1938)
+	1978  ES1978 Maestro 2E
+		1033 803c  ES1978 Maestro-2E Audiodrive
+		1033 8058  ES1978 Maestro-2E Audiodrive
+		1092 4000  Monster Sound MX400
+		1179 0001  ES1978 Maestro-2E Audiodrive
+	1988  ES1988 Allegro-1
+		1092 4100  Sonic Impact S100
+		125d 1988  ESS Allegro-1 Audiodrive
+	1989  ESS Modem
+		125d 1989  ESS Modem
+	1998  ES1983S Maestro-3i PCI Audio Accelerator
+		1028 00e6  ES1983S Maestro-3i (Dell Inspiron 8100)
+	1999  ES1983S Maestro-3i PCI Modem Accelerator
+	199a  ES1983S Maestro-3i PCI Audio Accelerator
+	199b  ES1983S Maestro-3i PCI Modem Accelerator
+	2808  ES336H Fax Modem (Later Model)
+	2838  ES2838/2839 SuperLink Modem
+	2898  ES2898 Modem
+		125d 0424  ES56-PI Data Fax Modem
+		125d 0425  ES56T-PI Data Fax Modem
+		125d 0426  ES56V-PI Data Fax Modem
+		125d 0427  VW-PI Data Fax Modem
+		125d 0428  ES56ST-PI Data Fax Modem
+		125d 0429  ES56SV-PI Data Fax Modem
+		147a c001  ES56-PI Data Fax Modem
+		14fe 0428  ES56-PI Data Fax Modem
+		14fe 0429  ES56-PI Data Fax Modem
+125e  Specialvideo Engineering SRL
+125f  Concurrent Technologies, Inc.
+1260  Harris Semiconductor
+	3873  Prism 2.5 Wavelan chipset
+		1186 3501  DWL-520 Wireless PCI Adapter
+	8130  HMP8130 NTSC/PAL Video Decoder
+	8131  HMP8131 NTSC/PAL Video Decoder
+1261  Matsushita-Kotobuki Electronics Industries, Ltd.
+1262  ES Computer Company, Ltd.
+1263  Sonic Solutions
+1264  Aval Nagasaki Corporation
+1265  Casio Computer Co., Ltd.
+1266  Microdyne Corporation
+	0001  NE10/100 Adapter (i82557B)
+	1910  NE2000Plus (RT8029) Ethernet Adapter
+		1266 1910  NE2000Plus Ethernet Adapter
+1267  S. A. Telecommunications
+	5352  PCR2101
+	5a4b  Telsat Turbo
+1268  Tektronix
+1269  Thomson-CSF/TTM
+126a  Lexmark International, Inc.
+126b  Adax, Inc.
+126c  Northern Telecom
+126d  Splash Technology, Inc.
+126e  Sumitomo Metal Industries, Ltd.
+126f  Silicon Motion, Inc.
+	0710  SM710 LynxEM
+	0712  SM712 LynxEM+
+	0720  SM720 Lynx3DM
+	0810  SM810 LynxE
+	0811  SM811 LynxE
+	0820  SM820 Lynx3D
+	0910  SM910
+1270  Olympus Optical Co., Ltd.
+1271  GW Instruments
+1272  Telematics International
+1273  Hughes Network Systems
+	0002  DirecPC
+1274  Ensoniq
+	1371  ES1371 [AudioPCI-97]
+		0e11 0024  AudioPCI on Motherboard Compaq Deskpro
+		0e11 b1a7  ES1371, ES1373 AudioPCI
+		1033 80ac  ES1371, ES1373 AudioPCI
+		1042 1854  Tazer
+		107b 8054  Tabor2
+		1274 1371  Creative Sound Blaster AudioPCI64V, AudioPCI128
+		1462 6470  ES1371, ES1373 AudioPCI On Motherboard MS-6147 1.1A
+		1462 6560  ES1371, ES1373 AudioPCI On Motherboard MS-6156 1.10
+		1462 6630  ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 1.0A
+		1462 6631  ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 1.0A
+		1462 6632  ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 2.0A
+		1462 6633  ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 2.0A
+		1462 6820  ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00
+		1462 6822  ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00A
+		1462 6830  ES1371, ES1373 AudioPCI On Motherboard MS-6183 1.00
+		1462 6880  ES1371, ES1373 AudioPCI On Motherboard MS-6188 1.00
+		1462 6900  ES1371, ES1373 AudioPCI On Motherboard MS-6190 1.00
+		1462 6910  ES1371, ES1373 AudioPCI On Motherboard MS-6191
+		1462 6930  ES1371, ES1373 AudioPCI On Motherboard MS-6193
+		1462 6990  ES1371, ES1373 AudioPCI On Motherboard MS-6199BX 2.0A
+		1462 6991  ES1371, ES1373 AudioPCI On Motherboard MS-6199VIA 2.0A
+		14a4 2077  ES1371, ES1373 AudioPCI On Motherboard KR639
+		14a4 2105  ES1371, ES1373 AudioPCI On Motherboard MR800
+		14a4 2107  ES1371, ES1373 AudioPCI On Motherboard MR801
+		14a4 2172  ES1371, ES1373 AudioPCI On Motherboard DR739
+		1509 9902  ES1371, ES1373 AudioPCI On Motherboard KW11
+		1509 9903  ES1371, ES1373 AudioPCI On Motherboard KW31
+		1509 9904  ES1371, ES1373 AudioPCI On Motherboard KA11
+		1509 9905  ES1371, ES1373 AudioPCI On Motherboard KC13
+		152d 8801  ES1371, ES1373 AudioPCI On Motherboard CP810E
+		152d 8802  ES1371, ES1373 AudioPCI On Motherboard CP810
+		152d 8803  ES1371, ES1373 AudioPCI On Motherboard P3810E
+		152d 8804  ES1371, ES1373 AudioPCI On Motherboard P3810-S
+		152d 8805  ES1371, ES1373 AudioPCI On Motherboard P3820-S
+		270f 2001  ES1371, ES1373 AudioPCI On Motherboard 6CTR
+		270f 2200  ES1371, ES1373 AudioPCI On Motherboard 6WTX
+		270f 3000  ES1371, ES1373 AudioPCI On Motherboard 6WSV
+		270f 3100  ES1371, ES1373 AudioPCI On Motherboard 6WIV2
+		270f 3102  ES1371, ES1373 AudioPCI On Motherboard 6WIV
+		270f 7060  ES1371, ES1373 AudioPCI On Motherboard 6ASA2
+		8086 4249  ES1371, ES1373 AudioPCI On Motherboard BI440ZX
+		8086 424c  ES1371, ES1373 AudioPCI On Motherboard BL440ZX
+		8086 425a  ES1371, ES1373 AudioPCI On Motherboard BZ440ZX
+		8086 4341  ES1371, ES1373 AudioPCI On Motherboard Cayman
+		8086 4343  ES1371, ES1373 AudioPCI On Motherboard Cape Cod
+		8086 4649  ES1371, ES1373 AudioPCI On Motherboard Fire Island
+		8086 464a  ES1371, ES1373 AudioPCI On Motherboard FJ440ZX
+		8086 4d4f  ES1371, ES1373 AudioPCI On Motherboard Montreal
+		8086 4f43  ES1371, ES1373 AudioPCI On Motherboard OC440LX
+		8086 5243  ES1371, ES1373 AudioPCI On Motherboard RC440BX
+		8086 5352  ES1371, ES1373 AudioPCI On Motherboard SunRiver
+		8086 5643  ES1371, ES1373 AudioPCI On Motherboard Vancouver
+		8086 5753  ES1371, ES1373 AudioPCI On Motherboard WS440BX
+	5000  ES1370 [AudioPCI]
+		4942 4c4c  Creative Sound Blaster AudioPCI128
+	5880  5880 AudioPCI
+		1274 2000  Creative Sound Blaster AudioPCI128
+		1274 2003  Creative SoundBlaster AudioPCI 128
+		1274 5880  Creative Sound Blaster AudioPCI128
+		1458 a000  5880 AudioPCI On Motherboard 6OXET
+		1462 6880  5880 AudioPCI On Motherboard MS-6188 1.00
+		270f 2001  5880 AudioPCI On Motherboard 6CTR
+		270f 2200  5880 AudioPCI On Motherboard 6WTX
+		270f 7040  5880 AudioPCI On Motherboard 6ATA4
+1275  Network Appliance Corporation
+1276  Switched Network Technologies, Inc.
+1277  Comstream
+1278  Transtech Parallel Systems Ltd.
+	0701  TPE3/TM3 PowerPC Node
+1279  Transmeta Corporation
+	0295  Northbridge
+	0395  LongRun Northbridge
+	0396  SDRAM controller
+	0397  BIOS scratchpad
+127a  Rockwell International
+	1002  HCF 56k Data/Fax Modem
+		1092 094c  SupraExpress 56i PRO [Diamond SUP2380]
+		122d 4002  HPG / MDP3858-U
+		122d 4005  MDP3858-E
+		122d 4007  MDP3858-A/-NZ
+		122d 4012  MDP3858-SA
+		122d 4017  MDP3858-W
+		122d 4018  MDP3858-W
+		127a 1002  Rockwell 56K D/F HCF Modem
+	1003  HCF 56k Data/Fax Modem
+		0e11 b0bc  229-DF Zephyr
+		0e11 b114  229-DF Cheetah
+		1033 802b  229-DF
+		13df 1003  PCI56RX Modem
+		13e0 0117  IBM
+		13e0 0147  IBM F-1156IV+/R3 Spain V.90 Modem
+		13e0 0197  IBM
+		13e0 01c7  IBM F-1156IV+/R3 WW V.90 Modem
+		13e0 01f7  IBM
+		1436 1003  IBM
+		1436 1103  IBM 5614PM3G V.90 Modem
+		1436 1602  Compaq 229-DF Ducati
+	1004  HCF 56k Data/Fax/Voice Modem
+		1048 1500  MicroLink 56k Modem
+		10cf 1059  Fujitsu 229-DFRT
+	1005  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+		1033 8029  229-DFSV
+		1033 8054  Modem
+		10cf 103c  Fujitsu
+		10cf 1055  Fujitsu 229-DFSV
+		10cf 1056  Fujitsu 229-DFSV
+		122d 4003  MDP3858SP-U
+		122d 4006  Packard Bell MDP3858V-E
+		122d 4008  MDP3858SP-A/SP-NZ
+		122d 4009  MDP3858SP-E
+		122d 4010  MDP3858V-U
+		122d 4011  MDP3858SP-SA
+		122d 4013  MDP3858V-A/V-NZ
+		122d 4015  MDP3858SP-W
+		122d 4016  MDP3858V-W
+		122d 4019  MDP3858V-SA
+		13df 1005  PCI56RVP Modem
+		13e0 0187  IBM
+		13e0 01a7  IBM
+		13e0 01b7  IBM DF-1156IV+/R3 Spain V.90 Modem
+		13e0 01d7  IBM DF-1156IV+/R3 WW V.90 Modem
+		1436 1005  IBM
+		1436 1105  IBM
+		1437 1105  IBM 5614PS3G V.90 Modem
+	1022  HCF 56k Modem
+		1436 1303  M3-5614PM3G V.90 Modem
+	1023  HCF 56k Data/Fax Modem
+		122d 4020  Packard Bell MDP3858-WE
+		122d 4023  MDP3858-UE
+		13e0 0247  IBM F-1156IV+/R6 Spain V.90 Modem
+		13e0 0297  IBM
+		13e0 02c7  IBM F-1156IV+/R6 WW V.90 Modem
+		1436 1203  IBM
+		1436 1303  IBM
+	1024  HCF 56k Data/Fax/Voice Modem
+	1025  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+		10cf 106a  Fujitsu 235-DFSV
+		122d 4021  Packard Bell MDP3858V-WE
+		122d 4022  MDP3858SP-WE
+		122d 4024  MDP3858V-UE
+		122d 4025  MDP3858SP-UE
+	1026  HCF 56k PCI Speakerphone Modem
+	1032  HCF 56k Modem
+	1033  HCF 56k Modem
+	1034  HCF 56k Modem
+	1035  HCF 56k PCI Speakerphone Modem
+	1036  HCF 56k Modem
+	1085  HCF 56k Volcano PCI Modem
+	2005  HCF 56k Data/Fax Modem
+		104d 8044  229-DFSV
+		104d 8045  229-DFSV
+		104d 8055  PBE/Aztech 235W-DFSV
+		104d 8056  235-DFSV
+		104d 805a  Modem
+		104d 805f  Modem
+		104d 8074  Modem
+	2013  HSF 56k Data/Fax Modem
+		1179 0001  Modem
+		1179 ff00  Modem
+	2014  HSF 56k Data/Fax/Voice Modem
+		10cf 1057  Fujitsu Citicorp III
+		122d 4050  MSP3880-U
+		122d 4055  MSP3880-W
+	2015  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+		10cf 1063  Fujitsu
+		10cf 1064  Fujitsu
+		1468 2015  Fujitsu
+	2016  HSF 56k Data/Fax/Voice/Spkp Modem
+		122d 4051  MSP3880V-W
+		122d 4052  MSP3880SP-W
+		122d 4054  MSP3880V-U
+		122d 4056  MSP3880SP-U
+		122d 4057  MSP3880SP-A
+	4311  Riptide HSF 56k PCI Modem
+		127a 4311  Ring Modular? Riptide HSF RT HP Dom
+		13e0 0210  HP-GVC
+	4320  Riptide PCI Audio Controller
+		1235 4320  Riptide PCI Audio Controller
+	4321  Riptide HCF 56k PCI Modem
+		1235 4321  Hewlett Packard DF
+		1235 4324  Hewlett Packard DF
+		13e0 0210  Hewlett Packard DF
+		144d 2321  Riptide
+	4322  Riptide PCI Game Controller
+		1235 4322  Riptide PCI Game Controller
+	8234  RapidFire 616X ATM155 Adapter
+		108d 0022  RapidFire 616X ATM155 Adapter
+		108d 0027  RapidFire 616X ATM155 Adapter
+127b  Pixera Corporation
+127c  Crosspoint Solutions, Inc.
+127d  Vela Research
+127e  Winnov, L.P.
+127f  Fujifilm
+1280  Photoscript Group Ltd.
+1281  Yokogawa Electric Corporation
+1282  Davicom Semiconductor, Inc.
+	9009  Ethernet 100/10 MBit
+	9100  Ethernet 100/10 MBit
+	9102  Ethernet 100/10 MBit
+	9132  Ethernet 100/10 MBit
+1283  Integrated Technology Express, Inc.
+	673a  IT8330G
+	8330  IT8330G
+	8888  IT8888F PCI to ISA Bridge with SMB
+	8889  IT8889F PCI to ISA Bridge
+	e886  IT8330G
+1284  Sahara Networks, Inc.
+1285  Platform Technologies, Inc.
+	0100  AGOGO sound chip (aka ESS Maestro 1)
+1286  Mazet GmbH
+1287  M-Pact, Inc.
+	001e  LS220D DVD Decoder
+	001f  LS220C DVD Decoder
+1288  Timestep Corporation
+1289  AVC Technology, Inc.
+128a  Asante Technologies, Inc.
+128b  Transwitch Corporation
+128c  Retix Corporation
+128d  G2 Networks, Inc.
+	0021  ATM155 Adapter
+128e  Hoontech Corporation/Samho Multi Tech Ltd.
+	0008  ST128 WSS/SB
+	0009  ST128 SAM9407
+	000a  ST128 Game Port
+	000b  ST128 MPU Port
+	000c  ST128 Ctrl Port
+128f  Tateno Dennou, Inc.
+1290  Sord Computer Corporation
+1291  NCS Computer Italia
+1292  Tritech Microelectronics Inc
+1293  Media Reality Technology
+1294  Rhetorex, Inc.
+1295  Imagenation Corporation
+1296  Kofax Image Products
+1297  Holco Enterprise Co, Ltd/Shuttle Computer
+1298  Spellcaster Telecommunications Inc.
+1299  Knowledge Technology Lab.
+129a  VMetro, inc.
+	0615  PBT-615 PCI-X Bus Analyzer
+129b  Image Access
+129c  Jaycor
+129d  Compcore Multimedia, Inc.
+129e  Victor Company of Japan, Ltd.
+129f  OEC Medical Systems, Inc.
+12a0  Allen-Bradley Company
+12a1  Simpact Associates, Inc.
+12a2  Newgen Systems Corporation
+12a3  Lucent Technologies
+12a4  NTT Electronics Technology Company
+12a5  Vision Dynamics Ltd.
+12a6  Scalable Networks, Inc.
+12a7  AMO GmbH
+12a8  News Datacom
+12a9  Xiotech Corporation
+12aa  SDL Communications, Inc.
+12ab  Yuan Yuan Enterprise Co., Ltd.
+	3000  MPG-200C PCI DVD Decoder Card
+12ac  Measurex Corporation
+12ad  Multidata GmbH
+12ae  Alteon Networks Inc.
+	0001  AceNIC Gigabit Ethernet
+		12ae 0001  Gigabit Ethernet-SX (Universal)
+		1410 0104  Gigabit Ethernet-SX PCI Adapter
+	0002  AceNIC Gigabit Ethernet (Copper)
+		12ae 0002  Gigabit Ethernet-T (3C986-T)
+12af  TDK USA Corp
+12b0  Jorge Scientific Corp
+12b1  GammaLink
+12b2  General Signal Networks
+12b3  Inter-Face Co Ltd
+12b4  FutureTel Inc
+12b5  Granite Systems Inc.
+12b6  Natural Microsystems
+12b7  Cognex Modular Vision Systems Div. - Acumen Inc.
+12b8  Korg
+12b9  US Robotics/3Com
+	1006  WinModem
+		12b9 005c  USR 56k Internal Voice WinModem (Model 3472)
+		12b9 005e  USR 56k Internal WinModem (Models 662975)
+		12b9 0062  USR 56k Internal Voice WinModem (Model 662978)
+		12b9 0068  USR 56k Internal Voice WinModem (Model 5690)
+		12b9 007a  USR 56k Internal Voice WinModem (Model 662974)
+		12b9 007f  USR 56k Internal WinModem (Models 5698, 5699)
+		12b9 0080  USR 56k Internal WinModem (Models 2975, 3528)
+		12b9 0081  USR 56k Internal Voice WinModem (Models 2974, 3529)
+		12b9 0091  USR 56k Internal Voice WinModem (Model 2978)
+	1007  USR 56k Internal WinModem
+		12b9 00a3  USR 56k Internal WinModem (Model 3595)
+	1008  56K FaxModem Model 5610
+		12b9 00a2  USR 56k Internal FAX Modem (Model 2977)
+		12b9 00aa  USR 56k Internal Voice Modem (Model 2976)
+		12b9 00ab  USR 56k Internal Voice Modem (Model 5609)
+		12b9 00ac  USR 56k Internal Voice Modem (Model 3298)
+		12b9 00ad  USR 56k Internal FAX Modem (Model 5610)
+12ba  PMC Sierra
+12bb  Nippon Unisoft Corporation
+12bc  Array Microsystems
+12bd  Computerm Corp.
+12be  Anchor Chips Inc.
+	3041  AN3041Q CO-MEM
+	3042  AN3042Q CO-MEM Lite
+		12be 3042  Anchor Chips Lite Evaluation Board
+12bf  Fujifilm Microdevices
+12c0  Infimed
+12c1  GMM Research Corp
+12c2  Mentec Limited
+12c3  Holtek Microelectronics Inc
+	0058  PCI NE2K Ethernet
+	5598  PCI NE2K Ethernet
+12c4  Connect Tech Inc
+12c5  Picture Elements Incorporated
+	007e  Imaging/Scanning Subsystem Engine
+	007f  Imaging/Scanning Subsystem Engine
+	0081  PCIVST [Grayscale Thresholding Engine]
+	0085  Video Simulator/Sender
+	0086  THR2 Multi-scale Thresholder
+12c6  Mitani Corporation
+12c7  Dialogic Corp
+12c8  G Force Co, Ltd
+12c9  Gigi Operations
+12ca  Integrated Computing Engines
+12cb  Antex Electronics Corporation
+12cc  Pluto Technologies International
+12cd  Aims Lab
+12ce  Netspeed Inc.
+12cf  Prophet Systems, Inc.
+12d0  GDE Systems, Inc.
+12d1  PSITech
+12d2  NVidia / SGS Thomson (Joint Venture)
+	0008  NV1
+	0009  DAC64
+	0018  Riva128
+		1048 0c10  VICTORY Erazor
+		107b 8030  STB Velocity 128
+		1092 0350  Viper V330
+		1092 1092  Viper V330
+		10b4 1b1b  STB Velocity 128
+		10b4 1b1d  STB Velocity 128
+		10b4 1b1e  STB Velocity 128, PAL TV-Out
+		10b4 1b20  STB Velocity 128 Sapphire
+		10b4 1b21  STB Velocity 128
+		10b4 1b22  STB Velocity 128 AGP, NTSC TV-Out
+		10b4 1b23  STB Velocity 128 AGP, PAL TV-Out
+		10b4 1b27  STB Velocity 128 DVD
+		10b4 1b88  MVP Pro 128
+		10b4 222a  STB Velocity 128 AGP
+		10b4 2230  STB Velocity 128
+		10b4 2232  STB Velocity 128
+		10b4 2235  STB Velocity 128 AGP
+		2a15 54a3  3DVision-SAGP / 3DexPlorer 3000
+	0019  Riva128ZX
+	0020  TNT
+	0028  TNT2
+	0029  UTNT2
+	002c  VTNT2
+	00a0  ITNT2
+12d3  Vingmed Sound A/S
+12d4  Ulticom (Formerly DGM&S)
+12d5  Equator Technologies
+12d6  Analogic Corp
+12d7  Biotronic SRL
+12d8  Pericom Semiconductor
+12d9  Aculab PLC
+12da  True Time Inc.
+12db  Annapolis Micro Systems, Inc
+12dc  Symicron Computer Communication Ltd.
+12dd  Management Graphics
+12de  Rainbow Technologies
+12df  SBS Technologies Inc
+12e0  Chase Research
+	0010  ST16C654 Quad UART
+	0020  ST16C654 Quad UART
+	0030  ST16C654 Quad UART
+12e1  Nintendo Co, Ltd
+12e2  Datum Inc. Bancomm-Timing Division
+12e3  Imation Corp - Medical Imaging Systems
+12e4  Brooktrout Technology Inc
+12e5  Apex Semiconductor Inc
+12e6  Cirel Systems
+12e7  Sunsgroup Corporation
+12e8  Crisc Corp
+12e9  GE Spacenet
+12ea  Zuken
+12eb  Aureal Semiconductor
+	0001  Vortex 1
+		104d 8036  AU8820 Vortex Digital Audio Processor
+		1092 2000  Sonic Impact A3D
+		1092 2100  Sonic Impact A3D
+		1092 2110  Sonic Impact A3D
+		1092 2200  Sonic Impact A3D
+		122d 1002  AU8820 Vortex Digital Audio Processor
+		12eb 0001  AU8820 Vortex Digital Audio Processor
+		5053 3355  Montego
+	0002  Vortex 2
+		104d 8049  AU8830 Vortex 3D Digital Audio Processor
+		104d 807b  AU8830 Vortex 3D Digital Audio Processor
+		1092 3000  Monster Sound II
+		1092 3001  Monster Sound II
+		1092 3002  Monster Sound II
+		1092 3003  Monster Sound II
+		1092 3004  Monster Sound II
+		12eb 0001  AU8830 Vortex 3D Digital Audio Processor
+		12eb 0002  AU8830 Vortex 3D Digital Audio Processor
+		12eb 0088  AU8830 Vortex 3D Digital Audio Processor
+		144d 3510  AU8830 Vortex 3D Digital Audio Processor
+		5053 3356  Montego II
+	0003  AU8810 Vortex Digital Audio Processor
+		104d 8049  AU8810 Vortex Digital Audio Processor
+		104d 8077  AU8810 Vortex Digital Audio Processor
+		109f 1000  AU8810 Vortex Digital Audio Processor
+		12eb 0003  AU8810 Vortex Digital Audio Processor
+		1462 6780  AU8810 Vortex Digital Audio Processor
+		14a4 2073  AU8810 Vortex Digital Audio Processor
+		14a4 2091  AU8810 Vortex Digital Audio Processor
+		14a4 2104  AU8810 Vortex Digital Audio Processor
+		14a4 2106  AU8810 Vortex Digital Audio Processor
+	8803  Vortex 56k Software Modem
+		12eb 8803  Vortex 56k Software Modem
+12ec  3A International, Inc.
+12ed  Optivision Inc.
+12ee  Orange Micro
+12ef  Vienna Systems
+12f0  Pentek
+12f1  Sorenson Vision Inc
+12f2  Gammagraphx, Inc.
+12f3  Radstone Technology
+12f4  Megatel
+12f5  Forks
+12f6  Dawson France
+12f7  Cognex
+12f8  Electronic Design GmbH
+	0002  VideoMaker
+12f9  Four Fold Ltd
+12fb  Spectrum Signal Processing
+12fc  Capital Equipment Corp
+12fd  I2S
+12fe  ESD Electronic System Design GmbH
+12ff  Lexicon
+1300  Harman International Industries Inc
+1302  Computer Sciences Corp
+1303  Innovative Integration
+1304  Juniper Networks
+1305  Netphone, Inc
+1306  Duet Technologies
+1307  Computer Boards
+	0001  PCI-DAS1602/16
+	000b  PCI-DIO48H
+	000c  PCI-PDISO8
+	000d  PCI-PDISO16
+	000f  PCI-DAS1200
+	0010  PCI-DAS1602/12
+	0014  PCI-DIO24H
+	0015  PCI-DIO24H/CTR3
+	0016  PCI-DIO48H/CTR15
+	0017  PCI-DIO96H
+	0018  PCI-CTR05
+	0019  PCI-DAS1200/JR
+	001a  PCI-DAS1001
+	001b  PCI-DAS1002
+	001c  PCI-DAS1602JR/16
+	001d  PCI-DAS6402/16
+	001e  PCI-DAS6402/12
+	001f  PCI-DAS16/M1
+	0020  PCI-DDA02/12
+	0021  PCI-DDA04/12
+	0022  PCI-DDA08/12
+	0023  PCI-DDA02/16
+	0024  PCI-DDA04/16
+	0025  PCI-DDA08/16
+	0026  PCI-DAC04/12-HS
+	0027  PCI-DAC04/16-HS
+	0028  PCI-DIO24
+	0029  PCI-DAS08
+	002c  PCI-INT32
+	0033  PCI-DUAL-AC5
+	0034  PCI-DAS-TC
+	0035  PCI-DAS64/M1/16
+	0036  PCI-DAS64/M2/16
+	0037  PCI-DAS64/M3/16
+	004c  PCI-DAS1000
+1308  Jato Technologies Inc.
+	0001  NetCelerator Adapter
+		1308 0001  NetCelerator Adapter
+1309  AB Semiconductor Ltd
+130a  Mitsubishi Electric Microcomputer
+130b  Colorgraphic Communications Corp
+130c  Ambex Technologies, Inc
+130d  Accelerix Inc
+130e  Yamatake-Honeywell Co. Ltd
+130f  Advanet Inc
+1310  Gespac
+1311  Videoserver, Inc
+1312  Acuity Imaging, Inc
+1313  Yaskawa Electric Co.
+1316  Teradyne Inc
+1317  Linksys
+	0981  Fast Ethernet 10/100
+	0985  Network Everywhere Fast Ethernet 10/100 model NC100
+	1985  Fast Ethernet 10/100
+1318  Packet Engines Inc.
+	0911  PCI Ethernet Adapter
+1319  Fortemedia, Inc
+	0801  Xwave QS3000A [FM801]
+	0802  Xwave QS3000A [FM801 game port]
+	1000  FM801 PCI Audio
+	1001  FM801 PCI Joystick
+131a  Finisar Corp.
+131c  Nippon Electro-Sensory Devices Corp
+131d  Sysmic, Inc.
+131e  Xinex Networks Inc
+131f  Siig Inc
+	1000  CyberSerial (1-port) 16550
+	1001  CyberSerial (1-port) 16650
+	1002  CyberSerial (1-port) 16850
+	1010  Duet 1S(16550)+1P
+	1011  Duet 1S(16650)+1P
+	1012  Duet 1S(16850)+1P
+	1020  CyberParallel (1-port)
+	1021  CyberParallel (2-port)
+	1030  CyberSerial (2-port) 16550
+	1031  CyberSerial (2-port) 16650
+	1032  CyberSerial (2-port) 16850
+	1034  Trio 2S(16550)+1P
+	1035  Trio 2S(16650)+1P
+	1036  Trio 2S(16850)+1P
+	1050  CyberSerial (4-port) 16550
+	1051  CyberSerial (4-port) 16650
+	1052  CyberSerial (4-port) 16850
+	2000  CyberSerial (1-port) 16550
+	2001  CyberSerial (1-port) 16650
+	2002  CyberSerial (1-port) 16850
+	2010  Duet 1S(16550)+1P
+	2011  Duet 1S(16650)+1P
+	2012  Duet 1S(16850)+1P
+	2020  CyberParallel (1-port)
+	2021  CyberParallel (2-port)
+	2030  CyberSerial (2-port) 16550
+		131f 2030  PCI Serial Card
+	2031  CyberSerial (2-port) 16650
+	2032  CyberSerial (2-port) 16850
+	2040  Trio 1S(16550)+2P
+	2041  Trio 1S(16650)+2P
+	2042  Trio 1S(16850)+2P
+	2050  CyberSerial (4-port) 16550
+	2051  CyberSerial (4-port) 16650
+	2052  CyberSerial (4-port) 16850
+	2060  Trio 2S(16550)+1P
+	2061  Trio 2S(16650)+1P
+	2062  Trio 2S(16850)+1P
+1320  Crypto AG
+1321  Arcobel Graphics BV
+1322  MTT Co., Ltd
+1323  Dome Inc
+1324  Sphere Communications
+1325  Salix Technologies, Inc
+1326  Seachange international
+1327  Voss scientific
+1328  quadrant international
+1329  Productivity Enhancement
+132a  Microcom Inc.
+132b  Broadband Technologies
+132c  Micrel Inc
+132d  Integrated Silicon Solution, Inc.
+1330  MMC Networks
+1331  Radisys Corp.
+1332  Micro Memory
+1334  Redcreek Communications, Inc
+1335  Videomail, Inc
+1337  Third Planet Publishing
+1338  BT Electronics
+133a  Vtel Corp
+133b  Softcom Microsystems
+133c  Holontech Corp
+133d  SS Technologies
+133e  Virtual Computer Corp
+133f  SCM Microsystems
+1340  Atalla Corp
+1341  Kyoto Microcomputer Co
+1342  Promax Systems Inc
+1343  Phylon Communications Inc
+1344  Crucial Technology
+1345  Arescom Inc
+1347  Odetics
+1349  Sumitomo Electric Industries, Ltd.
+134a  DTC Technology Corp.
+	0001  Domex 536
+	0002  Domex DMX3194UP SCSI Adapter
+134b  ARK Research Corp.
+134c  Chori Joho System Co. Ltd
+134d  PCTel Inc
+	7890  HSP MicroModem 56
+	7891  HSP MicroModem 56
+		134d 0001  HSP MicroModem 56
+	7892  HSP MicroModem 56
+	7893  HSP MicroModem 56
+	7894  HSP MicroModem 56
+	7895  HSP MicroModem 56
+	7896  HSP MicroModem 56
+	7897  HSP MicroModem 56
+134e  CSTI
+134f  Algo System Co Ltd
+1350  Systec Co. Ltd
+1351  Sonix Inc
+1353  Dassault A.T.
+1354  Dwave System Inc
+1355  Kratos Analytical Ltd
+1356  The Logical Co
+1359  Prisa Networks
+135a  Brain Boxes
+135b  Giganet Inc
+135c  Quatech Inc
+	00f0  MPAC-100 Syncronous Serial Card (Zilog 85230)
+135d  ABB Network Partner AB
+135e  Sealevel Systems Inc
+	7101  Single Port RS-232/422/485/530
+	7201  Dual Port RS-232/422/485 Interface
+	7202  Dual Port RS-232 Interface
+	7401  Four Port RS-232 Interface
+	7402  Four Port RS-422/485 Interface
+	7801  Eight Port RS-232 Interface
+	8001  8001 Digital I/O Adapter
+135f  I-Data International A-S
+1360  Meinberg Funkuhren
+1361  Soliton Systems K.K.
+1362  Fujifacom Corporation
+1363  Phoenix Technology Ltd
+1364  ATM Communications Inc
+1365  Hypercope GmbH
+1366  Teijin Seiki Co. Ltd
+1367  Hitachi Zosen Corporation
+1368  Skyware Corporation
+1369  Digigram
+136a  High Soft Tech
+136b  Kawasaki Steel Corporation
+136c  Adtek System Science Co Ltd
+136d  Gigalabs Inc
+136f  Applied Magic Inc
+1370  ATL Products
+1371  CNet Technology Inc
+1373  Silicon Vision Inc
+1374  Silicom Ltd
+1375  Argosystems Inc
+1376  LMC
+1377  Electronic Equipment Production & Distribution GmbH
+1378  Telemann Co. Ltd
+1379  Asahi Kasei Microsystems Co Ltd
+137a  Mark of the Unicorn Inc
+137b  PPT Vision
+137c  Iwatsu Electric Co Ltd
+137d  Dynachip Corporation
+137e  Patriot Scientific Corporation
+137f  Japan Satellite Systems Inc
+1380  Sanritz Automation Co Ltd
+1381  Brains Co. Ltd
+1382  Marian - Electronic & Software
+1383  Controlnet Inc
+1384  Reality Simulation Systems Inc
+1385  Netgear
+	4100  802.11b Wireless Adapter (MA301)
+	620a  GA620
+	622a  GA622
+	630a  GA630
+	f311  FA311
+1386  Video Domain Technologies
+1387  Systran Corp
+1388  Hitachi Information Technology Co Ltd
+1389  Applicom International
+	0001  PCI1500PFB [Intelligent fieldbus adaptor]
+138a  Fusion Micromedia Corp
+138b  Tokimec Inc
+138c  Silicon Reality
+138d  Future Techno Designs pte Ltd
+138e  Basler GmbH
+138f  Patapsco Designs Inc
+1390  Concept Development Inc
+1391  Development Concepts Inc
+1392  Medialight Inc
+1393  Moxa Technologies Co Ltd
+	1040  Smartio C104H/PCI
+	1680  Smartio C168H/PCI
+	2040  Intellio CP-204J
+	2180  Intellio C218 Turbo PCI
+	3200  Intellio C320 Turbo PCI
+1394  Level One Communications
+	0001  LXT1001 Gigabit Ethernet
+		1394 0001  NetCelerator Adapter
+1395  Ambicom Inc
+1396  Cipher Systems Inc
+1397  Cologne Chip Designs GmbH
+	2bd0  ISDN network controller [HFC-PCI]
+		1397 2bd0  ISDN Board
+		e4bf 1000  CI1-1-Harp
+1398  Clarion co. Ltd
+1399  Rios systems Co Ltd
+139a  Alacritech Inc
+	0001  Quad Port 10/100 Server Accelerator
+	0003  Single Port 10/100 Server Accelerator
+	0005  Single Port Gigabit Server Accelerator
+139b  Mediasonic Multimedia Systems Ltd
+139c  Quantum 3d Inc
+139d  EPL limited
+139e  Media4
+139f  Aethra s.r.l.
+13a0  Crystal Group Inc
+13a1  Kawasaki Heavy Industries Ltd
+13a2  Ositech Communications Inc
+13a3  Hifn Inc.
+	0005  7751 Security Processor
+	0006  6500 Public Key Processor
+	0007  7811 Security Processor
+	0012  7951 Security Processor
+13a4  Rascom Inc
+13a5  Audio Digital Imaging Inc
+13a6  Videonics Inc
+13a7  Teles AG
+13a8  Exar Corp.
+	0158  XR17C158 Octal UART
+13a9  Siemens Medical Systems, Ultrasound Group
+13aa  Broadband Networks Inc
+13ab  Arcom Control Systems Ltd
+13ac  Motion Media Technology Ltd
+13ad  Nexus Inc
+13ae  ALD Technology Ltd
+13af  T.Sqware
+13b0  Maxspeed Corp
+13b1  Tamura corporation
+13b2  Techno Chips Co. Ltd
+13b3  Lanart Corporation
+13b4  Wellbean Co Inc
+13b5  ARM
+13b6  Dlog GmbH
+13b7  Logic Devices Inc
+13b8  Nokia Telecommunications oy
+13b9  Elecom Co Ltd
+13ba  Oxford Instruments
+13bb  Sanyo Technosound Co Ltd
+13bc  Bitran Corporation
+13bd  Sharp corporation
+13be  Miroku Jyoho Service Co. Ltd
+13bf  Sharewave Inc
+13c0  Microgate Corporation
+	0010  SyncLink WAN Adapter
+13c1  3ware Inc
+	1000  3ware ATA-RAID
+	1001  3ware 7000-series ATA-RAID
+	1002  3ware ATA-RAID
+13c2  Technotrend Systemtechnik GmbH
+13c3  Janz Computer AG
+13c4  Phase Metrics
+13c5  Alphi Technology Corp
+13c6  Condor Engineering Inc
+13c7  Blue Chip Technology Ltd
+13c8  Apptech Inc
+13c9  Eaton Corporation
+13ca  Iomega Corporation
+13cb  Yano Electric Co Ltd
+13cc  Metheus Corporation
+13cd  Compatible Systems Corporation
+13ce  Cocom A/S
+13cf  Studio Audio & Video Ltd
+13d0  Techsan Electronics Co Ltd
+13d1  Abocom Systems Inc
+	ab06  RTL8139 [FE2000VX] CardBus Fast Ethernet Attached Port Adapter
+13d2  Shark Multimedia Inc
+13d3  IMC Networks
+13d4  Graphics Microsystems Inc
+13d5  Media 100 Inc
+13d6  K.I. Technology Co Ltd
+13d7  Toshiba Engineering Corporation
+13d8  Phobos corporation
+13d9  Apex PC Solutions Inc
+13da  Intresource Systems pte Ltd
+13db  Janich & Klass Computertechnik GmbH
+13dc  Netboost Corporation
+13dd  Multimedia Bundle Inc
+13de  ABB Robotics Products AB
+13df  E-Tech Inc
+	0001  PCI56RVP Modem
+		13df 0001  PCI56RVP Modem
+13e0  GVC Corporation
+13e1  Silicom Multimedia Systems Inc
+13e2  Dynamics Research Corporation
+13e3  Nest Inc
+13e4  Calculex Inc
+13e5  Telesoft Design Ltd
+13e6  Argosy research Inc
+13e7  NAC Incorporated
+13e8  Chip Express Corporation
+13e9  Chip Express Corporation
+13ea  Dallas Semiconductor
+13eb  Hauppauge Computer Works Inc
+13ec  Zydacron Inc
+13ed  Raytheion E-Systems
+13ee  Hayes Microcomputer Products Inc
+13ef  Coppercom Inc
+13f0  Sundance Technology Inc
+	0201  ST201 Sundance Ethernet
+13f1  Oce' - Technologies B.V.
+13f2  Ford Microelectronics Inc
+13f3  Mcdata Corporation
+13f4  Troika Networks, Inc.
+	1401  Zentai Fibre Channel Adapter
+13f5  Kansai Electric Co. Ltd
+13f6  C-Media Electronics Inc
+	0100  CM8338A
+		13f6 ffff  CMI8338/C3DX PCI Audio Device
+	0101  CM8338B
+		13f6 0101  CMI8338-031 PCI Audio Device
+	0111  CM8738
+		1043 8077  CMI8738 6-channel audio controller
+		1043 80e2  CMI8738 6ch-MX
+		13f6 0111  CMI8738/C3DX PCI Audio Device
+	0211  CM8738
+13f7  Wildfire Communications
+13f8  Ad Lib Multimedia Inc
+13f9  NTT Advanced Technology Corp.
+13fa  Pentland Systems Ltd
+13fb  Aydin Corp
+13fc  Computer Peripherals International
+13fd  Micro Science Inc
+13fe  Advantech Co. Ltd
+	1756  PCI-1756
+13ff  Silicon Spice Inc
+1400  Artx Inc
+	1401  9432 TX
+1401  CR-Systems A/S
+1402  Meilhaus Electronic GmbH
+1403  Ascor Inc
+1404  Fundamental Software Inc
+1405  Excalibur Systems Inc
+1406  Oce' Printing Systems GmbH
+1407  Lava Computer mfg Inc
+	0100  Lava Dual Serial
+	0101  Lava Quatro A
+	0102  Lava Quatro B
+	0200  Lava Port Plus
+	0201  Lava Quad A
+	0202  Lava Quad B
+	0500  Lava Single Serial
+	0600  Lava Port 650
+	8000  Lava Parallel
+	8001  Dual parallel port controller A
+	8002  Lava Dual Parallel port A
+	8003  Lava Dual Parallel port B
+	8800  BOCA Research IOPPAR
+1408  Aloka Co. Ltd
+1409  Timedia Technology Co Ltd
+	7168  PCI2S550 (Dual 16550 UART)
+140a  DSP Research Inc
+140b  Ramix Inc
+140c  Elmic Systems Inc
+140d  Matsushita Electric Works Ltd
+140e  Goepel Electronic GmbH
+140f  Salient Systems Corp
+1410  Midas lab Inc
+1411  Ikos Systems Inc
+1412  IC Ensemble Inc
+	1712  ICE1712 [Envy24]
+1413  Addonics
+1414  Microsoft Corporation
+1415  Oxford Semiconductor Ltd
+	8403  VScom 011H-EP1 1 port parallel adaptor
+	9501  OX16PCI954 (Quad 16950 UART) function 0
+		15ed 2000  MCCR Serial p0-3 of 8
+		15ed 2001  MCCR Serial p0-3 of 16
+	950a  EXSYS EX-41092 Dual 16950 Serial adapter
+	950b  OXCB950 Cardbus 16950 UART
+	9511  OX16PCI954 (Quad 16950 UART) function 1
+		15ed 2000  MCCR Serial p4-7 of 8
+		15ed 2001  MCCR Serial p4-15 of 16
+	9521  OX16PCI952 (Dual 16950 UART)
+1416  Multiwave Innovation pte Ltd
+1417  Convergenet Technologies Inc
+1418  Kyushu electronics systems Inc
+1419  Excel Switching Corp
+141a  Apache Micro Peripherals Inc
+141b  Zoom Telephonics Inc
+141d  Digitan Systems Inc
+141e  Fanuc Ltd
+141f  Visiontech Ltd
+1420  Psion Dacom plc
+1421  Ads Technologies Inc
+1422  Ygrec Systems Co Ltd
+1423  Custom Technology Corp.
+1424  Videoserver Connections
+1425  ASIC Designers Inc
+1426  Storage Technology Corp.
+1427  Better On-Line Solutions
+1428  Edec Co Ltd
+1429  Unex Technology Corp.
+142a  Kingmax Technology Inc
+142b  Radiolan
+142c  Minton Optic Industry Co Ltd
+142d  Pix stream Inc
+142e  Vitec Multimedia
+142f  Radicom Research Inc
+1430  ITT Aerospace/Communications Division
+1431  Gilat Satellite Networks
+1432  Edimax Computer Co.
+1433  Eltec Elektronik GmbH
+1435  Real Time Devices US Inc.
+1436  CIS Technology Inc
+1437  Nissin Inc Co
+1438  Atmel-dream
+1439  Outsource Engineering & Mfg. Inc
+143a  Stargate Solutions Inc
+143b  Canon Research Center, America
+143c  Amlogic Inc
+143d  Tamarack Microelectronics Inc
+143e  Jones Futurex Inc
+143f  Lightwell Co Ltd - Zax Division
+1440  ALGOL Corp.
+1441  AGIE Ltd
+1442  Phoenix Contact GmbH & Co.
+1443  Unibrain S.A.
+1444  TRW
+1445  Logical DO Ltd
+1446  Graphin Co Ltd
+1447  AIM GmBH
+1448  Alesis Studio Electronics
+1449  TUT Systems Inc
+144a  Adlink Technology
+	7296  PCI-7296
+	7432  PCI-7432
+	7433  PCI-7433
+	7434  PCI-7434
+	7841  PCI-7841
+	8133  PCI-8133
+	8554  PCI-8554
+	9111  PCI-9111
+	9113  PCI-9113
+	9114  PCI-9114
+144b  Loronix Information Systems Inc
+144c  Catalina Research Inc
+144d  Samsung Electronics Co Ltd
+144e  OLITEC
+144f  Askey Computer Corp.
+1450  Octave Communications Ind.
+1451  SP3D Chip Design GmBH
+1453  MYCOM Inc
+1454  Altiga Networks
+1455  Logic Plus Plus Inc
+1456  Advanced Hardware Architectures
+1457  Nuera Communications Inc
+1458  Giga-byte Technology
+1459  DOOIN Electronics
+145a  Escalate Networks Inc
+145b  PRAIM SRL
+145c  Cryptek
+145d  Gallant Computer Inc
+145e  Aashima Technology B.V.
+145f  Baldor Electric Company
+	0001  NextMove PCI
+1460  DYNARC INC
+1461  Avermedia Technologies Inc
+1462  Micro-star International Co Ltd
+1463  Fast Corporation
+1464  Interactive Circuits & Systems Ltd
+1465  GN NETTEST Telecom DIV.
+1466  Designpro Inc.
+1467  DIGICOM SPA
+1468  AMBIT Microsystem Corp.
+1469  Cleveland Motion Controls
+146a  IFR
+146b  Parascan Technologies Ltd
+146c  Ruby Tech Corp.
+146d  Tachyon, INC.
+146e  Williams Electronics Games, Inc.
+146f  Multi Dimensional Consulting Inc
+1470  Bay Networks
+1471  Integrated Telecom Express Inc
+1472  DAIKIN Industries, Ltd
+1473  ZAPEX Technologies Inc
+1474  Doug Carson & Associates
+1475  PICAZO Communications
+1476  MORTARA Instrument Inc
+1477  Net Insight
+1478  DIATREND Corporation
+1479  TORAY Industries Inc
+147a  FORMOSA Industrial Computing
+147b  ABIT Computer Corp.
+147c  AWARE, Inc.
+147d  Interworks Computer Products
+147e  Matsushita Graphic Communication Systems, Inc.
+147f  NIHON UNISYS, Ltd.
+1480  SCII Telecom
+1481  BIOPAC Systems Inc
+1482  ISYTEC - Integrierte Systemtechnik GmBH
+1483  LABWAY Corporation
+1484  Logic Corporation
+1485  ERMA - Electronic GmBH
+1486  L3 Communications Telemetry & Instrumentation
+1487  MARQUETTE Medical Systems
+1488  KONTRON Electronik GmBH
+1489  KYE Systems Corporation
+148a  OPTO
+148b  INNOMEDIALOGIC Inc.
+148c  C.P. Technology Co. Ltd
+148d  DIGICOM Systems, Inc.
+	1003  HCF 56k Data/Fax Modem
+148e  OSI Plus Corporation
+148f  Plant Equipment, Inc.
+1490  Stone Microsystems PTY Ltd.
+1491  ZEAL Corporation
+1492  Time Logic Corporation
+1493  MAKER Communications
+1494  WINTOP Technology, Inc.
+1495  TOKAI Communications Industry Co. Ltd
+1496  JOYTECH Computer Co., Ltd.
+1497  SMA Regelsysteme GmBH
+1498  TEWS Datentechnik GmBH
+1499  EMTEC CO., Ltd
+149a  ANDOR Technology Ltd
+149b  SEIKO Instruments Inc
+149c  OVISLINK Corp.
+149d  NEWTEK Inc
+149e  Mapletree Networks Inc.
+149f  LECTRON Co Ltd
+14a0  SOFTING GmBH
+14a1  Systembase Co Ltd
+14a2  Millennium Engineering Inc
+14a3  Maverick Networks
+14a4  GVC/BCM Advanced Research
+14a5  XIONICS Document Technologies Inc
+14a6  INOVA Computers GmBH & Co KG
+14a7  MYTHOS Systems Inc
+14a8  FEATRON Technologies Corporation
+14a9  HIVERTEC Inc
+14aa  Advanced MOS Technology Inc
+14ab  Mentor Graphics Corp.
+14ac  Novaweb Technologies Inc
+14ad  Time Space Radio AB
+14ae  CTI, Inc
+14af  Guillemot Corporation
+14b0  BST Communication Technology Ltd
+14b1  Nextcom K.K.
+14b2  ENNOVATE Networks Inc
+14b3  XPEED Inc
+	0000  DSL NIC
+14b4  PHILIPS Business Electronics B.V.
+14b5  Creamware GmBH
+14b6  Quantum Data Corp.
+14b7  PROXIM Inc
+	0001  Symphony 4110
+14b8  Techsoft Technology Co Ltd
+14b9  AIRONET Wireless Communications
+	0001  PC4800
+	0340  PC4800
+	0350  PC4800
+	4500  PC4500
+	4800  PC4800
+14ba  INTERNIX Inc.
+14bb  SEMTECH Corporation
+14bc  Globespan Semiconductor Inc.
+14bd  CARDIO Control N.V.
+14be  L3 Communications
+14bf  SPIDER Communications Inc.
+14c0  COMPAL Electronics Inc
+14c1  MYRICOM Inc.
+14c2  DTK Computer
+14c3  MEDIATEK Corp.
+14c4  IWASAKI Information Systems Co Ltd
+14c5  Automation Products AB
+14c6  Data Race Inc
+14c7  Modular Technology Holdings Ltd
+14c8  Turbocomm Tech. Inc.
+14c9  ODIN Telesystems Inc
+14ca  PE Logic Corp.
+14cb  Billionton Systems Inc
+14cc  NAKAYO Telecommunications Inc
+14cd  Universal Scientific Ind.
+14ce  Whistle Communications
+14cf  TEK Microsystems Inc.
+14d0  Ericsson Axe R & D
+14d1  Computer Hi-Tech Co Ltd
+14d2  Titan Electronics Inc
+	8001  VScom 010L 1 port parallel adaptor
+	8002  VScom 020L 2 port parallel adaptor
+	8010  VScom 100L 1 port serial adaptor
+	8011  VScom 110L 1 port serial and 1 port parallel adaptor
+	8020  VScom 200L 1 port serial adaptor
+	8021  VScom 210L 2 port serial and 1 port parallel adaptor
+	8040  VScom 400L 4 port serial adaptor
+	8080  VScom 800L 8 port serial adaptor
+	a000  VScom 010H 1 port parallel adaptor
+	a001  VScom 100H 1 port serial adaptor
+	a003  VScom 400H 4 port serial adaptor
+	a004  VScom 400HF1 4 port serial adaptor
+	a005  VScom 200H 2 port serial adaptor
+	e001  VScom 010HV2 1 port parallel adaptor
+	e010  VScom 100HV2 1 port serial adaptor
+	e020  VScom 200HV2 2 port serial adaptor
+14d3  CIRTECH (UK) Ltd
+14d4  Panacom Technology Corp
+14d5  Nitsuko Corporation
+14d6  Accusys Inc
+14d7  Hirakawa Hewtech Corp
+14d8  HOPF Elektronik GmBH
+14d9  Alpha Processor Inc
+14da  National Aerospace Laboratories
+14db  AFAVLAB Technology Inc
+	2120  TK9902
+14dc  Amplicon Liveline Ltd
+	0000  PCI230
+	0001  PCI242
+	0002  PCI244
+	0003  PCI247
+	0004  PCI248
+	0005  PCI249
+	0006  PCI260
+	0007  PCI224
+	0008  PCI234
+	0009  PCI236
+	000a  PCI272
+	000b  PCI215
+14dd  Boulder Design Labs Inc
+14de  Applied Integration Corporation
+14df  ASIC Communications Corp
+14e1  INVERTEX
+14e2  INFOLIBRIA
+14e3  AMTELCO
+14e4  Broadcom Corporation
+	1644  NetXtreme BCM5700 Gigabit Ethernet
+		1014 0277  Broadcom Vigil B5700 1000BaseTX
+		1028 00d1  Broadcom BCM5700
+		1028 0106  Broadcom BCM5700
+		1028 0109  Broadcom BCM5700 1000BaseTX
+		1028 010a  Broadcom BCM5700 1000BaseTX
+		10b7 1000  3C996-T 1000BaseTX
+		10b7 1001  3C996B-T 1000BaseTX
+		10b7 1002  3C996C-T 1000BaseTX
+		10b7 1003  3C997-T 1000BaseTX Dual Port
+		10b7 1004  3C996-SX 1000BaseSX
+		10b7 1005  3C997-SX 1000BaseSX Dual Port
+		10b7 1008  3C942 Gigabit LOM (31X31)
+		14e4 0002  NetXtreme 1000BaseSX
+		14e4 0003  NetXtreme 1000BaseSX
+		14e4 0004  NetXtreme 1000BaseTX
+		14e4 1028  NetXtreme 1000BaseTX
+		14e4 1644  BCM5700 1000BaseTX
+	1645  NetXtreme BCM5701 Gigabit Ethernet
+		0e11 007c  NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
+		0e11 007d  NC6770 Gigabit Server Adapter (PCI-X, 1000-SX)
+		0e11 0085  NC7780 Gigabit Server Adapter (embedded, WOL)
+		0e11 0099  NC7780 Gigabit Server Adapter (embedded, WOL)
+		0e11 009a  NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
+		1028 0121  Broadcom BCM5701 1000BaseTX
+		10b7 1004  3C996-SX 1000BaseSX
+		10b7 1006  3C996B-T 1000BaseTX
+		10b7 1007  3C1000-T 1000BaseTX
+		10b7 1008  3C940-BR01 1000BaseTX
+		14e4 0001  BCM5701 1000BaseTX
+		14e4 0005  BCM5701 1000BaseTX
+		14e4 0006  BCM5701 1000BaseTX
+		14e4 0007  BCM5701 1000BaseSX
+		14e4 0008  BCM5701 1000BaseTX
+		14e4 8008  BCM5701 1000BaseTX
+	1646  NetXtreme BCM5702 Gigabit Ethernet
+		0e11 00bb  NC7760 1000BaseTX
+		1028 0126  Broadcom BCM5702 1000BaseTX
+		14e4 8009  BCM5702 1000BaseTX
+	1647  NetXtreme BCM5703 Gigabit Ethernet
+		0e11 0099  NC7780 1000BaseTX
+		0e11 009a  NC7770 1000BaseTX
+		14e4 0009  BCM5703 1000BaseTX
+		14e4 000a  BCM5703 1000BaseSX
+		14e4 000b  BCM5703 1000BaseTX
+		14e4 8009  BCM5703 1000BaseTX
+		14e4 800a  BCM5703 1000BaseTX
+	1648  NetXtreme BCM5704 Gigabit Ethernet
+	164d  NetXtreme BCM5702FE Gigabit Ethernet
+	16a6  NetXtreme BCM5702X Gigabit Ethernet
+	16a7  NetXtreme BCM5703X Gigabit Ethernet
+	4212  BCM v.90 56k modem
+	5820  BCM5820 Crypto Accelerator
+	5821  BCM5821 Crypto Accelerator
+14e5  Pixelfusion Ltd
+14e6  SHINING Technology Inc
+14e7  3CX
+14e8  RAYCER Inc
+14e9  GARNETS System CO Ltd
+14ea  Planex Communications, Inc
+	ab06  FNW-3603-TX CardBus Fast Ethernet
+14eb  SEIKO EPSON Corp
+14ec  ACQIRIS
+14ed  DATAKINETICS Ltd
+14ee  MASPRO KENKOH Corp
+14ef  CARRY Computer ENG. CO Ltd
+14f0  CANON RESEACH CENTRE FRANCE
+14f1  Conexant
+	1002  HCF 56k Modem
+	1003  HCF 56k Modem
+	1004  HCF 56k Modem
+	1005  HCF 56k Modem
+	1006  HCF 56k Modem
+	1022  HCF 56k Modem
+	1023  HCF 56k Modem
+	1024  HCF 56k Modem
+	1025  HCF 56k Modem
+	1026  HCF 56k Modem
+	1032  HCF 56k Modem
+	1033  HCF 56k Data/Fax Modem
+		1033 8077  NEC
+		122d 4027  Dell Zeus - MDP3880-W(B) Data Fax Modem
+		122d 4030  Dell Mercury - MDP3880-U(B) Data Fax Modem
+		122d 4034  Dell Thor - MDP3880-W(U) Data Fax Modem
+		13e0 020d  Dell Copper
+		13e0 020e  Dell Silver
+		13e0 0261  IBM
+		13e0 0290  Compaq Goldwing
+		13e0 02a0  IBM
+		13e0 02b0  IBM
+		13e0 02c0  Compaq Scooter
+		13e0 02d0  IBM
+		144f 1500  IBM P85-DF (1)
+		144f 1501  IBM P85-DF (2)
+		144f 150a  IBM P85-DF (3)
+		144f 150b  IBM P85-DF Low Profile (1)
+		144f 1510  IBM P85-DF Low Profile (2)
+	1034  HCF 56k Data/Fax/Voice Modem
+	1035  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+		10cf 1098  Fujitsu P85-DFSV
+	1036  HCF 56k Data/Fax/Voice/Spkp Modem
+		104d 8067  HCF 56k Modem
+		122d 4029  MDP3880SP-W
+		122d 4031  MDP3880SP-U
+		13e0 0209  Dell Titanium
+		13e0 020a  Dell Graphite
+		13e0 0260  Gateway Red Owl
+		13e0 0270  Gateway White Horse
+	1052  HCF 56k Data/Fax Modem (Worldwide)
+	1053  HCF 56k Data/Fax Modem (Worldwide)
+	1054  HCF 56k Data/Fax/Voice Modem (Worldwide)
+	1055  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (Worldwide)
+	1056  HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
+	1057  HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
+	1059  HCF 56k Data/Fax/Voice Modem (Worldwide)
+	1063  HCF 56k Data/Fax Modem
+	1064  HCF 56k Data/Fax/Voice Modem
+	1065  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+	1066  HCF 56k Data/Fax/Voice/Spkp Modem
+		122d 4033  Dell Athena - MDP3900V-U
+	1433  HCF 56k Data/Fax Modem
+	1434  HCF 56k Data/Fax/Voice Modem
+	1435  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+	1436  HCF 56k Data/Fax Modem
+	1453  HCF 56k Data/Fax Modem
+		13e0 0240  IBM
+		13e0 0250  IBM
+		144f 1502  IBM P95-DF (1)
+		144f 1503  IBM P95-DF (2)
+	1454  HCF 56k Data/Fax/Voice Modem
+	1455  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+	1456  HCF 56k Data/Fax/Voice/Spkp Modem
+		122d 4035  Dell Europa - MDP3900V-W
+		122d 4302  Dell MP3930V-W(C) MiniPCI
+	1610  ADSL AccessRunner PCI Arbitration Device
+	1611  AccessRunner PCI ADSL Interface Device
+	1803  HCF 56k Modem
+		0e11 0023  623-LAN Grizzly
+		0e11 0043  623-LAN Yogi
+	1815  HCF 56k Modem
+		0e11 0022  Grizzly
+		0e11 0042  Yogi
+	2003  HSF 56k Data/Fax Modem
+	2004  HSF 56k Data/Fax/Voice Modem
+	2005  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+	2006  HSF 56k Data/Fax/Voice/Spkp Modem
+	2013  HSF 56k Data/Fax Modem
+		0e11 b195  Bear
+		0e11 b196  Seminole 1
+		0e11 b1be  Seminole 2
+		1025 8013  Acer
+		1033 809d  NEC
+		1033 80bc  NEC
+		155d 6793  HP
+		155d 8850  E Machines
+	2014  HSF 56k Data/Fax/Voice Modem
+	2015  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+	2016  HSF 56k Data/Fax/Voice/Spkp Modem
+	2043  HSF 56k Data/Fax Modem (WorldW SmartDAA)
+	2044  HSF 56k Data/Fax/Voice Modem (WorldW SmartDAA)
+	2045  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (WorldW SmartDAA)
+	2046  HSF 56k Data/Fax/Voice/Spkp Modem (WorldW SmartDAA)
+	2063  HSF 56k Data/Fax Modem (SmartDAA)
+	2064  HSF 56k Data/Fax/Voice Modem (SmartDAA)
+	2065  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (SmartDAA)
+	2066  HSF 56k Data/Fax/Voice/Spkp Modem (SmartDAA)
+	2093  HSF 56k Modem
+		155d 2f07  Legend
+	2143  HSF 56k Data/Fax/Cell Modem (Mob WorldW SmartDAA)
+	2144  HSF 56k Data/Fax/Voice/Cell Modem (Mob WorldW SmartDAA)
+	2145  HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob WorldW SmartDAA)
+	2146  HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob WorldW SmartDAA)
+	2163  HSF 56k Data/Fax/Cell Modem (Mob SmartDAA)
+	2164  HSF 56k Data/Fax/Voice/Cell Modem (Mob SmartDAA)
+	2165  HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob SmartDAA)
+	2166  HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob SmartDAA)
+	2343  HSF 56k Data/Fax CardBus Modem (Mob WorldW SmartDAA)
+	2344  HSF 56k Data/Fax/Voice CardBus Modem (Mob WorldW SmartDAA)
+	2345  HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob WorldW SmartDAA)
+	2346  HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob WorldW SmartDAA)
+	2363  HSF 56k Data/Fax CardBus Modem (Mob SmartDAA)
+	2364  HSF 56k Data/Fax/Voice CardBus Modem (Mob SmartDAA)
+	2365  HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob SmartDAA)
+	2366  HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob SmartDAA)
+	2443  HSF 56k Data/Fax Modem (Mob WorldW SmartDAA)
+		104d 8075  Modem
+		104d 8083  Modem
+		104d 8097  Modem
+	2444  HSF 56k Data/Fax/Voice Modem (Mob WorldW SmartDAA)
+	2445  HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob WorldW SmartDAA)
+	2446  HSF 56k Data/Fax/Voice/Spkp Modem (Mob WorldW SmartDAA)
+	2463  HSF 56k Data/Fax Modem (Mob SmartDAA)
+	2464  HSF 56k Data/Fax/Voice Modem (Mob SmartDAA)
+	2465  HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob SmartDAA)
+	2466  HSF 56k Data/Fax/Voice/Spkp Modem (Mob SmartDAA)
+	2f00  HSF 56k HSFi Modem
+		13e0 8d84  IBM HSFi V.90
+		13e0 8d85  Compaq Stinger
+		14f1 2004  Dynalink 56PMi
+	8234  RS8234 ATM SAR Controller [ServiceSAR Plus]
+14f2  MOBILITY Electronics
+14f3  BROADLOGIC
+14f4  TOKYO Electronic Industry CO Ltd
+14f5  SOPAC Ltd
+14f6  COYOTE Technologies LLC
+14f7  WOLF Technology Inc
+14f8  AUDIOCODES Inc
+14f9  AG COMMUNICATIONS
+14fa  WANDEL & GOCHERMANN
+14fb  TRANSAS MARINE (UK) Ltd
+14fc  QUADRICS Supercomputers World
+14fd  JAPAN Computer Industry Inc
+14fe  ARCHTEK TELECOM Corp
+14ff  TWINHEAD INTERNATIONAL Corp
+1500  DELTA Electronics, Inc
+1501  BANKSOFT CANADA Ltd
+1502  MITSUBISHI ELECTRIC LOGISTICS SUPPORT Co Ltd
+1503  KAWASAKI LSI USA Inc
+1504  KAISER Electronics
+1505  ITA INGENIEURBURO FUR TESTAUFGABEN GmbH
+1506  CHAMELEON Systems Inc
+# Should be HTEC Ltd, but there are no known HTEC chips and 1507 is already used by mistake by Motorola (see vendor ID 1057).
+1507  Motorola ?? / HTEC
+	0001  MPC105 [Eagle]
+	0002  MPC106 [Grackle]
+	0003  MPC8240 [Kahlua]
+	0100  MC145575 [HFC-PCI]
+	0431  KTI829c 100VG
+	4801  Raven
+	4802  Falcon
+	4803  Hawk
+	4806  CPX8216
+1508  HONDA CONNECTORS/MHOTRONICS Inc
+1509  FIRST INTERNATIONAL Computer Inc
+150a  FORVUS RESEARCH Inc
+150b  YAMASHITA Systems Corp
+150c  KYOPAL CO Ltd
+150d  WARPSPPED Inc
+150e  C-PORT Corp
+150f  INTEC GmbH
+1510  BEHAVIOR TECH Computer Corp
+1511  CENTILLIUM Technology Corp
+1512  ROSUN Technologies Inc
+1513  Raychem
+1514  TFL LAN Inc
+1515  Advent design
+1516  MYSON Technology Inc
+	0803  SURECOM EP-320X-S 100/10M Ethernet PCI Adapter
+		1320 10bd  SURECOM EP-320X-S 100/10M Ethernet PCI Adapter
+1517  ECHOTEK Corp
+1518  PEP MODULAR Computers GmbH
+1519  TELEFON AKTIEBOLAGET LM Ericsson
+151a  Globetek
+	1002  PCI-1002
+	1004  PCI-1004
+	1008  PCI-1008
+151b  COMBOX Ltd
+151c  DIGITAL AUDIO LABS Inc
+151d  Fujitsu Computer Products Of America
+151e  MATRIX Corp
+151f  TOPIC SEMICONDUCTOR Corp
+	0000  TP560 Data/Fax/Voice 56k modem
+1520  CHAPLET System Inc
+1521  BELL Corp
+1522  MainPine Ltd
+	0100  PCI <-> IOBus Bridge
+		1522 0200  RockForceDUO 2 Port V.92/V.44 Data/Fax/Voice Modem
+		1522 0300  RockForceQUATRO 4 Port V.92/V.44 Data/Fax/Voice Modem
+		1522 0400  RockForceDUO+ 2 Port V.92/V.44 Data/Fax/Voice Modem
+		1522 0500  RockForceQUATRO+ 4 Port V.92/V.44 Data/Fax/Voice Modem
+		1522 0600  RockForce+ 2 Port V.90 Data/Fax/Voice Modem
+1523  MUSIC Semiconductors
+1524  ENE Technology Inc
+1525  IMPACT Technologies
+1526  ISS, Inc
+1527  SOLECTRON
+1528  ACKSYS
+1529  AMERICAN MICROSystems Inc
+152a  QUICKTURN DESIGN Systems
+152b  FLYTECH Technology CO Ltd
+152c  MACRAIGOR Systems LLC
+152d  QUANTA Computer Inc
+152e  MELEC Inc
+152f  PHILIPS - CRYPTO
+1530  ACQIS Technology Inc
+1531  CHRYON Corp
+1532  ECHELON Corp
+1533  BALTIMORE
+1534  ROAD Corp
+1535  EVERGREEN Technologies Inc
+1537  DATALEX COMMUNCATIONS
+1538  ARALION Inc
+1539  ATELIER INFORMATIQUES et ELECTRONIQUE ETUDES S.A.
+153a  ONO SOKKI
+153b  TERRATEC Electronic GmbH
+153c  ANTAL Electronic
+153d  FILANET Corp
+153e  TECHWELL Inc
+153f  MIPS DENMARK
+1540  PROVIDEO MULTIMEDIA Co Ltd
+1541  MACHONE Communications
+1542  VIVID Technology Inc
+1543  SILICON Laboratories
+1544  DCM DATA Systems
+1545  VISIONTEK
+1546  IOI Technology Corp
+1547  MITUTOYO Corp
+1548  JET PROPULSION Laboratory
+1549  INTERCONNECT Systems Solutions
+154a  MAX Technologies Inc
+154b  COMPUTEX Co Ltd
+154c  VISUAL Technology Inc
+154d  PAN INTERNATIONAL Industrial Corp
+154e  SERVOTEST Ltd
+154f  STRATABEAM Technology
+1550  OPEN NETWORK Co Ltd
+1551  SMART Electronic DEVELOPMENT GmBH
+1552  RACAL AIRTECH Ltd
+1553  CHICONY Electronics Co Ltd
+1554  PROLINK Microsystems Corp
+1555  GESYTEC GmBH
+1556  PLD APPLICATIONS
+1557  MEDIASTAR Co Ltd
+1558  CLEVO/KAPOK Computer
+1559  SI LOGIC Ltd
+155a  INNOMEDIA Inc
+155b  PROTAC INTERNATIONAL Corp
+155c  Cemax-Icon Inc
+155d  Mac System Co Ltd
+155e  LP Elektronik GmbH
+155f  Perle Systems Ltd
+1560  Terayon Communications Systems
+1561  Viewgraphics Inc
+1562  Symbol Technologies
+1563  A-Trend Technology Co Ltd
+1564  Yamakatsu Electronics Industry Co Ltd
+1565  Biostar Microtech Int'l Corp
+1566  Ardent Technologies Inc
+1567  Jungsoft
+1568  DDK Electronics Inc
+1569  Palit Microsystems Inc.
+156a  Avtec Systems
+156b  2wire Inc
+156c  Vidac Electronics GmbH
+156d  Alpha-Top Corp
+156e  Alfa Inc
+156f  M-Systems Flash Disk Pioneers Ltd
+1570  Lecroy Corp
+1571  Contemporary Controls
+	a001  CCSI PCI20-485 ARCnet
+	a002  CCSI PCI20-485D ARCnet
+	a003  CCSI PCI20-485X ARCnet
+	a004  CCSI PCI20-CXB ARCnet
+	a005  CCSI PCI20-CXS ARCnet
+	a006  CCSI PCI20-FOG-SMA ARCnet
+	a007  CCSI PCI20-FOG-ST ARCnet
+	a008  CCSI PCI20-TB5 ARCnet
+	a009  CCSI PCI20-5-485 5Mbit ARCnet
+	a00a  CCSI PCI20-5-485D 5Mbit ARCnet
+	a00b  CCSI PCI20-5-485X 5Mbit ARCnet
+	a00c  CCSI PCI20-5-FOG-ST 5Mbit ARCnet
+	a00d  CCSI PCI20-5-FOG-SMA 5Mbit ARCnet
+	a201  CCSI PCI22-485 10Mbit ARCnet
+	a202  CCSI PCI22-485D 10Mbit ARCnet
+	a203  CCSI PCI22-485X 10Mbit ARCnet
+	a204  CCSI PCI22-CHB 10Mbit ARCnet
+	a205  CCSI PCI22-FOG_ST 10Mbit ARCnet
+	a206  CCSI PCI22-THB 10Mbit ARCnet
+1572  Otis Elevator Company
+1573  Lattice - Vantis
+1574  Fairchild Semiconductor
+1575  Voltaire Advanced Data Security Ltd
+1576  Viewcast COM
+1578  HITT
+1579  Dual Technology Corp
+157a  Japan Elecronics Ind Inc
+157b  Star Multimedia Corp
+157c  Eurosoft (UK)
+	8001  Fix2000 PCI Y2K Compliance Card
+157d  Gemflex Networks
+157e  Transition Networks
+157f  PX Instruments Technology Ltd
+1580  Primex Aerospace Co
+1581  SEH Computertechnik GmbH
+1582  Cytec Corp
+1583  Inet Technologies Inc
+1584  Uniwill Computer Corp
+1585  Logitron
+1586  Lancast Inc
+1587  Konica Corp
+1588  Solidum Systems Corp
+1589  Atlantek Microsystems Pty Ltd
+158a  Digalog Systems Inc
+158b  Allied Data Technologies
+158c  Hitachi Semiconductor & Devices Sales Co Ltd
+158d  Point Multimedia Systems
+158e  Lara Technology Inc
+158f  Ditect Coop
+1590  3pardata Inc
+1591  ARN
+1592  Syba Tech Ltd
+	0781  Multi-IO Card
+	0782  Parallel Port Card 2xEPP
+	0783  Multi-IO Card
+	0785  Multi-IO Card
+	0786  Multi-IO Card
+	0787  Multi-IO Card
+	0788  Multi-IO Card
+	078a  Multi-IO Card
+1593  Bops Inc
+1594  Netgame Ltd
+1595  Diva Systems Corp
+1596  Folsom Research Inc
+1597  Memec Design Services
+1598  Granite Microsystems
+1599  Delta Electronics Inc
+159a  General Instrument
+159b  Faraday Technology Corp
+159c  Stratus Computer Systems
+159d  Ningbo Harrison Electronics Co Ltd
+159e  A-Max Technology Co Ltd
+159f  Galea Network Security
+15a0  Compumaster SRL
+15a1  Geocast Network Systems
+15a2  Catalyst Enterprises Inc
+	0001  TA700 PCI Bus Analyzer/Exerciser
+15a3  Italtel
+15a4  X-Net OY
+15a5  Toyota Macs Inc
+15a6  Sunlight Ultrasound Technologies Ltd
+15a7  SSE Telecom Inc
+15a8  Shanghai Communications Technologies Center
+15aa  Moreton Bay
+15ab  Bluesteel Networks Inc
+15ac  North Atlantic Instruments
+15ad  VMWare Inc
+	0710  Virtual SVGA
+15ae  Amersham Pharmacia Biotech
+15b0  Zoltrix International Ltd
+15b1  Source Technology Inc
+15b2  Mosaid Technologies Inc
+15b3  Mellanox Technology
+	5274  MT21108 InfiniBridge
+15b4  CCI/TRIAD
+15b5  Cimetrics Inc
+15b6  Texas Memory Systems Inc
+15b7  Sandisk Corp
+15b8  ADDI-DATA GmbH
+15b9  Maestro Digital Communications
+15ba  Impacct Technology Corp
+15bb  Portwell Inc
+15bc  Agilent Technologies
+	2929  E2929A PCI/PCI-X Bus Analyzer
+15bd  DFI Inc
+15be  Sola Electronics
+15bf  High Tech Computer Corp (HTC)
+15c0  BVM Ltd
+15c1  Quantel
+15c2  Newer Technology Inc
+15c3  Taiwan Mycomp Co Ltd
+15c4  EVSX Inc
+15c5  Procomp Informatics Ltd
+15c6  Technical University of Budapest
+15c7  Tateyama System Laboratory Co Ltd
+	0349  Tateyama C-PCI PLC/NC card Rev.01A
+15c8  Penta Media Co Ltd
+15c9  Serome Technology Inc
+15ca  Bitboys OY
+15cb  AG Electronics Ltd
+15cc  Hotrail Inc
+15cd  Dreamtech Co Ltd
+15ce  Genrad Inc
+15cf  Hilscher GmbH
+15d1  Infineon Technologies AG
+15d2  FIC (First International Computer Inc)
+15d3  NDS Technologies Israel Ltd
+15d4  Iwill Corp
+15d5  Tatung Co
+15d6  Entridia Corp
+15d7  Rockwell-Collins Inc
+15d8  Cybernetics Technology Co Ltd
+15d9  Super Micro Computer Inc
+15da  Cyberfirm Inc
+15db  Applied Computing Systems Inc
+15dc  Litronic Inc
+	0001  Argus 300 PCI Cryptography Module
+15dd  Sigmatel Inc
+15de  Malleable Technologies Inc
+15df  Infinilink Corp
+15e0  Cacheflow Inc
+15e1  Voice Technologies Group Inc
+15e2  Quicknet Technologies Inc
+15e3  Networth Technologies Inc
+15e4  VSN Systemen BV
+15e5  Valley technologies Inc
+15e6  Agere Inc
+15e7  Get Engineering Corp
+15e8  National Datacomm Corp
+	0130  Wireless PCI Card
+15e9  Pacific Digital Corp
+15ea  Tokyo Denshi Sekei K.K.
+15eb  Drsearch GmbH
+15ec  Beckhoff GmbH
+15ed  Macrolink Inc
+15ee  In Win Development Inc
+15ef  Intelligent Paradigm Inc
+15f0  B-Tree Systems Inc
+15f1  Times N Systems Inc
+15f2  Diagnostic Instruments Inc
+15f3  Digitmedia Corp
+15f4  Valuesoft
+15f5  Power Micro Research
+15f6  Extreme Packet Device Inc
+15f7  Banctec
+15f8  Koga Electronics Co
+15f9  Zenith Electronics Corp
+15fa  J.P. Axzam Corp
+15fb  Zilog Inc
+15fc  Techsan Electronics Co Ltd
+15fd  N-CUBED.NET
+15fe  Kinpo Electronics Inc
+15ff  Fastpoint Technologies Inc
+1600  Northrop Grumman - Canada Ltd
+1601  Tenta Technology
+1602  Prosys-tec Inc
+1603  Nokia Wireless Communications
+1604  Central System Research Co Ltd
+1605  Pairgain Technologies
+1606  Europop AG
+1607  Lava Semiconductor Manufacturing Inc
+1608  Automated Wagering International
+1609  Scimetric Instruments Inc
+1619  FarSite Communications Ltd
+	0400  FarSync T2P (2 port X.21/V.35/V.24)
+	0440  FarSync T4P (4 port X.21/V.35/V.24)
+1629  Kongsberg Spacetec AS
+	1003  Format synchronizer v3.0
+	2002  Fast Universal Data Output
+1638  Standard Microsystems Corp [SMC]
+	1100  SMC2602W EZConnect / Addtron AWA-100
+1657  Brocade Communications Systems, Inc.
+165d  Hsing Tech. Enterprise Co., Ltd.
+1661  Worldspace Corp.
+1668  Action Tec Electronics Inc
+16ec  U.S. Robotics
+	3685  Wireless Access PCI Adapter Model 022415
+16f6  VideoTele.com, Inc.
+170b  NetOctave Inc
+170c  YottaYotta Inc.
+173b  Altima (nee Broadcom)
+	03e8  AC1000 Gigabit Ethernet
+	03ea  AC9100 Gigabit Ethernet
+1743  Peppercon AG
+	8139  ROL/F-100 Fast Ethernet Adapter with ROL
+174b  PC Partner Limited
+175e  Sanera Systems, Inc.
+# also used by Struck Innovative Systeme for joint developments
+1796  Research Centre Juelich
+	0001  SIS1100 [Gigabit link]
+	0002  HOTlink
+	0003  Counter Timer
+	0004  CAMAC Controller
+	0005  PROFIBUS
+	0006  AMCC HOTlink
+1813  Ambient Technologies Inc
+1a08  Sierra semiconductor
+	0000  SC15064
+1b13  Jaton Corp
+1c1c  Symphony
+	0001  82C101
+1d44  DPT
+	a400  PM2x24/PM3224
+1de1  Tekram Technology Co.,Ltd.
+	0391  TRM-S1040
+	2020  DC-390
+	690c  690c
+	dc29  DC290
+2001  Temporal Research Ltd
+21c3  21st Century Computer Corp.
+2348  Racore
+	2010  8142 100VG/AnyLAN
+2646  Kingston Technologies
+270b  Xantel Corporation
+270f  Chaintech Computer Co. Ltd
+2711  AVID Technology Inc.
+2a15  3D Vision(???)
+3000  Hansol Electronics Inc.
+3142  Post Impression Systems.
+3388  Hint Corp
+	0021  HB1-SE33 PCI-PCI Bridge
+	8011  VXPro II Chipset
+		3388 8011  VXPro II Chipset CPU to PCI Bridge
+	8012  VXPro II Chipset
+		3388 8012  VXPro II Chipset PCI to ISA Bridge
+	8013  VXPro II IDE
+		3388 8013  VXPro II Chipset EIDE Controller
+3411  Quantum Designs (H.K.) Inc
+3513  ARCOM Control Systems Ltd
+38ef  4Links
+3d3d  3DLabs
+	0001  GLINT 300SX
+	0002  GLINT 500TX
+	0003  GLINT Delta
+	0004  Permedia
+	0005  Permedia
+	0006  GLINT MX
+	0007  3D Extreme
+	0008  GLINT Gamma G1
+	0009  Permedia II 2D+3D
+		1040 0011  AccelStar II
+		3d3d 0100  AccelStar II 3D Accelerator
+		3d3d 0111  Permedia 3:16
+		3d3d 0114  Santa Ana
+		3d3d 0116  Oxygen GVX1
+		3d3d 0119  Scirocco
+		3d3d 0120  Santa Ana PCL
+		3d3d 0125  Oxygen VX1
+		3d3d 0127  Permedia3 Create!
+	000a  GLINT R3
+		3d3d 0121  Oxygen VX1
+	0100  Permedia II 2D+3D
+	1004  Permedia
+	3d04  Permedia
+	ffff  Glint VGA
+4005  Avance Logic Inc.
+	0300  ALS300 PCI Audio Device
+	0308  ALS300+ PCI Audio Device
+	0309  PCI Input Controller
+	1064  ALG-2064
+	2064  ALG-2064i
+	2128  ALG-2364A GUI Accelerator
+	2301  ALG-2301
+	2302  ALG-2302
+	2303  AVG-2302 GUI Accelerator
+	2364  ALG-2364A
+	2464  ALG-2464
+	2501  ALG-2564A/25128A
+	4000  ALS4000 Audio Chipset
+		4005 4000  ALS4000 Audio Chipset
+	4710  ALC200/200P
+4033  Addtron Technology Co, Inc.
+	1360  RTL8139 Ethernet
+4143  Digital Equipment Corp
+416c  Aladdin Knowledge Systems
+	0100  AladdinCARD
+	0200  CPC
+4444  Internext Compression Inc
+4468  Bridgeport machines
+4594  Cogetec Informatique Inc
+45fb  Baldor Electric Company
+4680  Umax Computer Corp
+4843  Hercules Computer Technology Inc
+4916  RedCreek Communications Inc
+	1960  RedCreek PCI adapter
+4943  Growth Networks
+4978  Axil Computer Inc
+4a14  NetVin
+	5000  NV5000SC
+		4a14 5000  RT8029-Based Ethernet Adapter
+4b10  Buslogic Inc.
+4c48  LUNG HWA Electronics
+4c53  SBS Technologies
+4ca1  Seanix Technology Inc
+4d51  MediaQ Inc.
+	0200  MQ-200
+4d54  Microtechnica Co Ltd
+4ddc  ILC Data Device Corp
+	0100  DD-42924I5-300 (ARINC 429 Data Bus)
+	0801  BU-65570I1 MIL-STD-1553 Test and Simulation
+	0802  BU-65570I2 MIL-STD-1553 Test and Simulation
+	0811  BU-65572I1 MIL-STD-1553 Test and Simulation
+	0812  BU-65572I2 MIL-STD-1553 Test and Simulation
+	0881  BU-65570T1 MIL-STD-1553 Test and Simulation
+	0882  BU-65570T2 MIL-STD-1553 Test and Simulation
+	0891  BU-65572T1 MIL-STD-1553 Test and Simulation
+	0892  BU-65572T2 MIL-STD-1553 Test and Simulation
+	0901  BU-65565C1 MIL-STD-1553 Data Bus
+	0902  BU-65565C2 MIL-STD-1553 Data Bus
+	0903  BU-65565C3 MIL-STD-1553 Data Bus
+	0904  BU-65565C4 MIL-STD-1553 Data Bus
+	0b01  BU-65569I1 MIL-STD-1553 Data Bus
+	0b02  BU-65569I2 MIL-STD-1553 Data Bus
+	0b03  BU-65569I3 MIL-STD-1553 Data Bus
+	0b04  BU-65569I4 MIL-STD-1553 Data Bus
+5046  GemTek Technology Corporation
+	1001  PCI Radio
+5053  Voyetra Technologies
+	2010  Daytona Audio Adapter
+5136  S S Technologies
+5143  Qualcomm Inc
+5145  Ensoniq (Old)
+	3031  Concert AudioPCI
+5301  Alliance Semiconductor Corp.
+	0001  ProMotion aT3D
+5333  S3 Inc.
+	0551  Plato/PX (system)
+	5631  86c325 [ViRGE]
+	8800  86c866 [Vision 866]
+	8801  86c964 [Vision 964]
+	8810  86c764_0 [Trio 32 vers 0]
+	8811  86c764/765 [Trio32/64/64V+]
+	8812  86cM65 [Aurora64V+]
+	8813  86c764_3 [Trio 32/64 vers 3]
+	8814  86c767 [Trio 64UV+]
+	8815  86cM65 [Aurora 128]
+	883d  86c988 [ViRGE/VX]
+	8870  FireGL
+	8880  86c868 [Vision 868 VRAM] vers 0
+	8881  86c868 [Vision 868 VRAM] vers 1
+	8882  86c868 [Vision 868 VRAM] vers 2
+	8883  86c868 [Vision 868 VRAM] vers 3
+	88b0  86c928 [Vision 928 VRAM] vers 0
+	88b1  86c928 [Vision 928 VRAM] vers 1
+	88b2  86c928 [Vision 928 VRAM] vers 2
+	88b3  86c928 [Vision 928 VRAM] vers 3
+	88c0  86c864 [Vision 864 DRAM] vers 0
+	88c1  86c864 [Vision 864 DRAM] vers 1
+	88c2  86c864 [Vision 864-P DRAM] vers 2
+	88c3  86c864 [Vision 864-P DRAM] vers 3
+	88d0  86c964 [Vision 964 VRAM] vers 0
+	88d1  86c964 [Vision 964 VRAM] vers 1
+	88d2  86c964 [Vision 964-P VRAM] vers 2
+	88d3  86c964 [Vision 964-P VRAM] vers 3
+	88f0  86c968 [Vision 968 VRAM] rev 0
+	88f1  86c968 [Vision 968 VRAM] rev 1
+	88f2  86c968 [Vision 968 VRAM] rev 2
+	88f3  86c968 [Vision 968 VRAM] rev 3
+	8900  86c755 [Trio 64V2/DX]
+		5333 8900  86C775 Trio64V2/DX
+	8901  86c775/86c785 [Trio 64V2/DX or /GX]
+		5333 8901  86C775 Trio64V2/DX, 86C785 Trio64V2/GX
+	8902  Plato/PX
+	8903  Trio 3D business multimedia
+	8904  Trio 64 3D
+		1014 00db  Integrated Trio3D
+		5333 8904  86C365 Trio3D AGP
+	8905  Trio 64V+ family
+	8906  Trio 64V+ family
+	8907  Trio 64V+ family
+	8908  Trio 64V+ family
+	8909  Trio 64V+ family
+	890a  Trio 64V+ family
+	890b  Trio 64V+ family
+	890c  Trio 64V+ family
+	890d  Trio 64V+ family
+	890e  Trio 64V+ family
+	890f  Trio 64V+ family
+	8a01  ViRGE/DX or /GX
+		0e11 b032  ViRGE/GX
+		10b4 1617  Nitro 3D
+		10b4 1717  Nitro 3D
+		5333 8a01  ViRGE/DX
+	8a10  ViRGE/GX2
+		1092 8a10  Stealth 3D 4000
+	8a13  86c368 [Trio 3D/2X]
+		5333 8a13  Trio3D/2X
+	8a20  86c794 [Savage 3D]
+		5333 8a20  86C391 Savage3D
+	8a21  86c390 [Savage 3D/MV]
+		5333 8a21  86C390 Savage3D/MV
+	8a22  Savage 4
+		1033 8068  Savage 4
+		1033 8069  Savage 4
+		105d 0018  SR9 8Mb SDRAM
+		105d 002a  SR9 Pro 16Mb SDRAM
+		105d 003a  SR9 Pro 32Mb SDRAM
+		105d 092f  SR9 Pro+ 16Mb SGRAM
+		1092 4207  Stealth III S540
+		1092 4800  Stealth III S540
+		1092 4807  SpeedStar A90
+		1092 4808  Stealth III S540
+		1092 4809  Stealth III S540
+		1092 480e  Stealth III S540
+		1092 4904  Stealth III S520
+		1092 4905  SpeedStar A200
+		1092 4a09  Stealth III S540
+		1092 4a0b  Stealth III S540 Xtreme
+		1092 4a0f  Stealth III S540
+		1092 4e01  Stealth III S540
+		1102 101d  3d Blaster Savage 4
+		1102 101e  3d Blaster Savage 4
+		5333 8100  86C394-397 Savage4 SDRAM 100
+		5333 8110  86C394-397 Savage4 SDRAM 110
+		5333 8125  86C394-397 Savage4 SDRAM 125
+		5333 8143  86C394-397 Savage4 SDRAM 143
+		5333 8a22  86C394-397 Savage4
+		5333 8a2e  86C394-397 Savage4 32bit
+		5333 9125  86C394-397 Savage4 SGRAM 125
+		5333 9143  86C394-397 Savage4 SGRAM 143
+	8a23  Savage 4
+	8a25  ProSavage PM133
+	8a26  ProSavage KM133
+	8c00  ViRGE/M3
+	8c01  ViRGE/MX
+		1179 0001  ViRGE/MX
+	8c02  ViRGE/MX+
+	8c03  ViRGE/MX+MV
+	8c10  86C270-294 Savage/MX-MV
+	8c11  82C270-294 Savage/MX
+	8c12  86C270-294 Savage/IX-MV
+	8c13  86C270-294 Savage/IX
+	8c22  SuperSavage MX/128
+	8c24  SuperSavage MX/64
+	8c26  SuperSavage MX/64C
+	8c2a  SuperSavage IX/128 SDR
+	8c2b  SuperSavage IX/128 DDR
+	8c2c  SuperSavage IX/64 SDR
+	8c2d  SuperSavage IX/64 DDR
+	8c2e  SuperSavage IX/C SDR
+		1014 01fc  ThinkPad T23 (2647-4MG)
+	8c2f  SuperSavage IX/C DDR
+# Integrated in VIA ProSavage PN133 North Bridge
+	8d01  VT8603 [ProSavage PN133] AGP4X VGA Controller (Twister)
+	8d02  VT8636A [ProSavage KN133] AGP4X VGA Controller (TwisterK)
+	8d04  VT8751 [ProSavageDDR P4M266] VGA Controller
+	9102  86C410 Savage 2000
+		1092 5932  Viper II Z200
+		1092 5934  Viper II Z200
+		1092 5952  Viper II Z200
+		1092 5954  Viper II Z200
+		1092 5a35  Viper II Z200
+		1092 5a37  Viper II Z200
+		1092 5a55  Viper II Z200
+		1092 5a57  Viper II Z200
+	ca00  SonicVibes
+544c  Teralogic Inc
+5455  Technische University Berlin
+	4458  S5933
+5519  Cnet Technologies, Inc.
+5544  Dunord Technologies
+	0001  I-30xx Scanner Interface
+5555  Genroco, Inc
+	0003  TURBOstor HFP-832 [HiPPI NIC]
+5700  Netpower
+6356  UltraStor
+6374  c't Magazin f�r Computertechnik
+	6773  GPPCI
+6409  Logitec Corp.
+6666  Decision Computer International Co.
+	0001  PCCOM4
+	0002  PCCOM8
+7604  O.N. Electronic Co Ltd.
+7bde  MIDAC Corporation
+7fed  PowerTV
+8008  Quancom Electronic GmbH
+	0010  WDOG1 [PCI-Watchdog 1]
+	0011  PWDOG2 [PCI-Watchdog 2]
+8086  Intel Corp.
+	0007  82379AB
+	0008  Extended Express System Support Controller
+	0039  21145
+	0122  82437FX
+	0482  82375EB
+	0483  82424ZX [Saturn]
+	0484  82378IB [SIO ISA Bridge]
+	0486  82430ZX [Aries]
+	04a3  82434LX [Mercury/Neptune]
+	04d0  82437FX [Triton FX]
+	0600  RAID Controller
+	0960  80960RP [i960 RP Microprocessor/Bridge]
+	0962  80960RM [i960RM Bridge]
+	0964  80960RP [i960 RP Microprocessor/Bridge]
+	1000  82542 Gigabit Ethernet Controller
+		0e11 b0df  NC1632 Gigabit Ethernet Adapter (1000-SX)
+		0e11 b0e0  NC1633 Gigabit Ethernet Adapter (1000-LX)
+		0e11 b123  NC1634 Gigabit Ethernet Adapter (1000-SX)
+		1014 0119  Netfinity Gigabit Ethernet SX Adapter
+		8086 1000  PRO/1000 Gigabit Server Adapter
+	1001  82543GC Gigabit Ethernet Controller
+		0e11 004a  NC6136 Gigabit Server Adapter
+		1014 01ea  Netfinity Gigabit Ethernet SX Adapter
+		8086 1003  PRO/1000 F Server Adapter
+	1002  Pro 100 LAN+Modem 56 Cardbus II
+		8086 200e  Pro 100 LAN+Modem 56 Cardbus II
+		8086 2013  Pro 100 SR Mobile Combo Adapter
+		8086 2017  Pro 100 S Combo Mobile Adapter
+	1004  82543GC Gigabit Ethernet Controller
+		0e11 0049  NC7132 Gigabit Upgrade Module
+		0e11 b1a4  NC7131 Gigabit Server Adapter
+		1014 10f2  Gigabit Ethernet Server Adapter
+		8086 1004  PRO/1000 T Server Adapter
+		8086 2004  PRO/1000 T Server Adapter
+	1008  82544EI Gigabit Ethernet Controller
+		8086 1107  PRO/1000 XT Server Adapter
+		8086 2107  PRO/1000 XT Server Adapter
+		8086 2110  PRO/1000 XT Server Adapter
+	1009  82544EI Gigabit Ethernet Controller
+		8086 1109  PRO/1000 XF Server Adapter
+		8086 2109  PRO/1000 XF Server Adapter
+	100c  82544GC Gigabit Ethernet Controller
+		8086 1112  PRO/1000 T Desktop Adapter
+		8086 2112  PRO/1000 T Desktop Adapter
+	100d  82544GC Gigabit Ethernet Controller
+	100e  82540EM Gigabit Ethernet Controller
+		8086 001e  PRO/1000 MT Desktop Adapter
+		8086 002e  PRO/1000 MT Desktop Adapter
+	100f  82545EM Gigabit Ethernet Controller
+		8086 1001  PRO/1000 MT Server Adapter
+	1010  82546EB Gigabit Ethernet Controller
+		8086 1011  PRO/1000 MT Dual Port Server Adapter
+	1011  82545EM Gigabit Ethernet Controller
+		8086 1002  PRO/1000 MF Server Adapter
+	1012  82546EB Gigabit Ethernet Controller
+		8086 1012  PRO/1000 MF Dual Port Server Adapter
+	1029  82559 Ethernet Controller
+	1030  82559 InBusiness 10/100
+	1031  82801CAM (ICH3) PRO/100 VE (LOM) Ethernet Controller
+		1014 0209  ThinkPad A30p (2653-64G)
+		104d 80e7  Vaio PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+		107b 5350  EtherExpress PRO/100 VE
+		1179 0001  EtherExpress PRO/100 VE
+		144d c000  EtherExpress PRO/100 VE
+		144d c001  EtherExpress PRO/100 VE
+		144d c003  EtherExpress PRO/100 VE
+	1032  82801CAM (ICH3) PRO/100 VE Ethernet Controller
+	1033  82801CAM (ICH3) PRO/100 VM (LOM) Ethernet Controller
+	1034  82801CAM (ICH3) PRO/100 VM Ethernet Controller
+	1035  82801CAM (ICH3)/82562EH (LOM)  Ethernet Controller
+	1036  82801CAM (ICH3) 82562EH Ethernet Controller
+	1037  82801CAM (ICH3) Chipset Ethernet Controller
+	1038  82801CAM (ICH3) PRO/100 VM (KM) Ethernet Controller
+	1039  82801BD PRO/100 VE (LOM) Ethernet Controller
+	103a  82801BD PRO/100 VE (CNR) Ethernet Controller
+	103b  82801BD PRO/100 VM (LOM) Ethernet Controller
+	103c  82801BD PRO/100 VM (CNR) Ethernet Controller
+	103d  82801BD PRO/100 VE (MOB) Ethernet Controller
+	103e  82801BD PRO/100 VM (MOB) Ethernet Controller
+	1059  82551QM Ethernet Controller
+	1130  82815 815 Chipset Host Bridge and Memory Controller Hub
+		1043 8027  TUSL2-C Mainboard
+		104d 80df  Vaio PCG-FX403
+	1131  82815 815 Chipset AGP Bridge
+	1132  82815 CGC [Chipset Graphics Controller]
+		1025 1016  Travelmate 612 TX
+		104d 80df  Vaio PCG-FX403
+	1161  82806AA PCI64 Hub Advanced Programmable Interrupt Controller
+		8086 1161  82806AA PCI64 Hub APIC
+	1200  Intel IXP1200 Network Processor
+		172a 0000  AEP SSL Accelerator
+	1209  82559ER
+	1221  82092AA_0
+	1222  82092AA_1
+	1223  SAA7116
+	1225  82452KX/GX [Orion]
+	1226  82596 PRO/10 PCI
+	1227  82865 EtherExpress PRO/100A
+	1228  82556 EtherExpress PRO/100 Smart
+# the revision field differentiates between them (1-3 is 82557, 4-5 is 82558, 6-8 is 82559, 9 is 82559ER)
+	1229  82557/8/9 [Ethernet Pro 100]
+		0e11 3001  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 3002  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 3003  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 3004  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 3005  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 3006  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 3007  82559 Fast Ethernet LOM with Alert on LAN*
+		0e11 b01e  NC3120 Fast Ethernet NIC
+		0e11 b01f  NC3122 Fast Ethernet NIC (dual port)
+		0e11 b02f  NC1120 Ethernet NIC
+		0e11 b04a  Netelligent 10/100TX NIC with Wake on LAN
+		0e11 b0c6  NC3161 Fast Ethernet NIC (embedded, WOL)
+		0e11 b0c7  NC3160 Fast Ethernet NIC (embedded)
+		0e11 b0d7  NC3121 Fast Ethernet NIC (WOL)
+		0e11 b0dd  NC3131 Fast Ethernet NIC (dual port)
+		0e11 b0de  NC3132 Fast Ethernet Module (dual port)
+		0e11 b0e1  NC3133 Fast Ethernet Module (100-FX)
+		0e11 b134  NC3163 Fast Ethernet NIC (embedded, WOL)
+		0e11 b13c  NC3162 Fast Ethernet NIC (embedded)
+		0e11 b144  NC3123 Fast Ethernet NIC (WOL)
+		0e11 b163  NC3134 Fast Ethernet NIC (dual port)
+		0e11 b164  NC3135 Fast Ethernet Upgrade Module (dual port)
+		0e11 b1a4  NC7131 Gigabit Server Adapter
+		1014 005c  82558B Ethernet Pro 10/100
+		1014 01bc  82559 Fast Ethernet LAN On Motherboard
+		1014 01f1  10/100 Ethernet Server Adapter
+		1014 01f2  10/100 Ethernet Server Adapter
+		1014 0207  Ethernet Pro/100 S
+		1014 0232  10/100 Dual Port Server Adapter
+		1014 105c  Netfinity 10/100
+		1014 305c  10/100 EtherJet Management Adapter
+		1014 405c  10/100 EtherJet Adapter with Alert on LAN
+		1014 505c  10/100 EtherJet Secure Management Adapter
+		1014 605c  10/100 EtherJet Secure Management Adapter
+		1014 705c  10/100 Netfinity 10/100 Ethernet Security Adapter
+		1014 805c  10/100 Netfinity 10/100 Ethernet Security Adapter
+		1033 8000  PC-9821X-B06
+		1033 8016  PK-UG-X006
+		1033 801f  PK-UG-X006
+		1033 8026  PK-UG-X006
+		1033 8063  82559-based Fast Ethernet Adapter
+		1033 8064  82559-based Fast Ethernet Adapter
+		103c 10c0  NetServer 10/100TX
+		103c 10c3  NetServer 10/100TX
+		103c 10ca  NetServer 10/100TX
+		103c 10cb  NetServer 10/100TX
+		103c 10e3  NetServer 10/100TX
+		103c 10e4  NetServer 10/100TX
+		103c 1200  NetServer 10/100TX
+		10c3 1100  SmartEther100 SC1100
+		10cf 1115  8255x-based Ethernet Adapter (10/100)
+		10cf 1143  8255x-based Ethernet Adapter (10/100)
+		1179 0001  8255x-based Ethernet Adapter (10/100)
+		1179 0002  PCI FastEther LAN on Docker
+		1179 0003  8255x-based Fast Ethernet
+		1259 2560  AT-2560 100
+		1259 2561  AT-2560 100 FX Ethernet Adapter
+		1266 0001  NE10/100 Adapter
+		144d 2501  SEM-2000 MiniPCI LAN Adapter
+		144d 2502  SEM-2100IL MiniPCI LAN Adapter
+		1668 1100  EtherExpress PRO/100B (TX) (MiniPCI Ethernet+Modem)
+		8086 0001  EtherExpress PRO/100B (TX)
+		8086 0002  EtherExpress PRO/100B (T4)
+		8086 0003  EtherExpress PRO/10+
+		8086 0004  EtherExpress PRO/100 WfM
+		8086 0005  82557 10/100
+		8086 0006  82557 10/100 with Wake on LAN
+		8086 0007  82558 10/100 Adapter
+		8086 0008  82558 10/100 with Wake on LAN
+		8086 0009  EtherExpress PRO/100+
+		8086 000a  EtherExpress PRO/100+ Management Adapter
+		8086 000b  EtherExpress PRO/100+
+		8086 000c  EtherExpress PRO/100+ Management Adapter
+		8086 000d  EtherExpress PRO/100+ Alert On LAN II* Adapter
+		8086 000e  EtherExpress PRO/100+ Management Adapter with Alert On LAN*
+		8086 000f  EtherExpress PRO/100 Desktop Adapter
+		8086 0010  EtherExpress PRO/100 S Management Adapter
+		8086 0011  EtherExpress PRO/100 S Management Adapter
+		8086 0012  EtherExpress PRO/100 S Advanced Management Adapter (D)
+		8086 0013  EtherExpress PRO/100 S Advanced Management Adapter (E)
+		8086 0030  EtherExpress PRO/100  Management Adapter with Alert On LAN* GC
+		8086 0031  EtherExpress PRO/100 Desktop Adapter
+		8086 0040  EtherExpress PRO/100 S Desktop Adapter
+		8086 0041  EtherExpress PRO/100 S Desktop Adapter
+		8086 0042  EtherExpress PRO/100 Desktop Adapter
+		8086 0050  EtherExpress PRO/100 S Desktop Adapter
+		8086 1009  EtherExpress PRO/100+ Server Adapter
+		8086 100c  EtherExpress PRO/100+ Server Adapter (PILA8470B)
+		8086 1012  EtherExpress PRO/100 S Server Adapter (D)
+		8086 1013  EtherExpress PRO/100 S Server Adapter (E)
+		8086 1015  EtherExpress PRO/100 S Dual Port Server Adapter
+		8086 1017  EtherExpress PRO/100+ Dual Port Server Adapter
+		8086 1030  EtherExpress PRO/100+ Management Adapter with Alert On LAN* G Server
+		8086 1040  EtherExpress PRO/100 S Server Adapter
+		8086 1041  EtherExpress PRO/100 S Server Adapter
+		8086 1042  EtherExpress PRO/100 Server Adapter
+		8086 1050  EtherExpress PRO/100 S Server Adapter
+		8086 1051  EtherExpress PRO/100 Server Adapter
+		8086 1052  EtherExpress PRO/100 Server Adapter
+		8086 10f0  EtherExpress PRO/100+ Dual Port Adapter
+		8086 2009  EtherExpress PRO/100 S Mobile Adapter
+		8086 200d  EtherExpress PRO/100 Cardbus
+		8086 200e  EtherExpress PRO/100 LAN+V90 Cardbus Modem
+		8086 200f  EtherExpress PRO/100 SR Mobile Adapter
+		8086 2010  EtherExpress PRO/100 S Mobile Combo Adapter
+		8086 2013  EtherExpress PRO/100 SR Mobile Combo Adapter
+		8086 2016  EtherExpress PRO/100 S Mobile Adapter
+		8086 2017  EtherExpress PRO/100 S Combo Mobile Adapter
+		8086 2018  EtherExpress PRO/100 SR Mobile Adapter
+		8086 2019  EtherExpress PRO/100 SR Combo Mobile Adapter
+		8086 2101  EtherExpress PRO/100 P Mobile Adapter
+		8086 2102  EtherExpress PRO/100 SP Mobile Adapter
+		8086 2103  EtherExpress PRO/100 SP Mobile Adapter
+		8086 2104  EtherExpress PRO/100 SP Mobile Adapter
+		8086 2105  EtherExpress PRO/100 SP Mobile Adapter
+		8086 2106  EtherExpress PRO/100 P Mobile Adapter
+		8086 2107  EtherExpress PRO/100 Network Connection
+		8086 2108  EtherExpress PRO/100 Network Connection
+		8086 2200  EtherExpress PRO/100 P Mobile Combo Adapter
+		8086 2201  EtherExpress PRO/100 P Mobile Combo Adapter
+		8086 2202  EtherExpress PRO/100 SP Mobile Combo Adapter
+		8086 2203  EtherExpress PRO/100+ MiniPCI
+		8086 2204  EtherExpress PRO/100+ MiniPCI
+		8086 2205  EtherExpress PRO/100 SP Mobile Combo Adapter
+		8086 2206  EtherExpress PRO/100 SP Mobile Combo Adapter
+		8086 2207  EtherExpress PRO/100 SP Mobile Combo Adapter
+		8086 2208  EtherExpress PRO/100 P Mobile Combo Adapter
+		8086 2402  EtherExpress PRO/100+ MiniPCI
+		8086 2407  EtherExpress PRO/100+ MiniPCI
+		8086 2408  EtherExpress PRO/100+ MiniPCI
+		8086 2409  EtherExpress PRO/100+ MiniPCI
+		8086 240f  EtherExpress PRO/100+ MiniPCI
+		8086 2410  EtherExpress PRO/100+ MiniPCI
+		8086 2411  EtherExpress PRO/100+ MiniPCI
+		8086 2412  EtherExpress PRO/100+ MiniPCI
+		8086 2413  EtherExpress PRO/100+ MiniPCI
+		8086 3000  82559 Fast Ethernet LAN on Motherboard
+		8086 3001  82559 Fast Ethernet LOM with Basic Alert on LAN*
+		8086 3002  82559 Fast Ethernet LOM with Alert on LAN II*
+		8086 3006  EtherExpress PRO/100 S Network Connection
+		8086 3007  EtherExpress PRO/100 S Network Connection
+		8086 3008  EtherExpress PRO/100 Network Connection
+		8086 3010  EtherExpress PRO/100 S Network Connection
+		8086 3011  EtherExpress PRO/100 S Network Connection
+		8086 3012  EtherExpress PRO/100 Network Connection
+	122d  430FX - 82437FX TSC [Triton I]
+	122e  82371FB PIIX ISA [Triton I]
+	1230  82371FB PIIX IDE [Triton I]
+	1231  DSVD Modem
+	1234  430MX - 82371MX Mobile PCI I/O IDE Xcelerator (MPIIX)
+	1235  430MX - 82437MX Mob. System Ctrlr (MTSC) & 82438MX Data Path (MTDP)
+	1237  440FX - 82441FX PMC [Natoma]
+	1239  82371FB
+	123b  82380PB
+	123c  82380AB
+	123d  683053 Programmable Interrupt Device
+	123f  82466GX Integrated Hot-Plug Controller (IHPC)
+	1240  752 AGP
+	124b  82380FB
+	1250  430HX - 82439HX TXC [Triton II]
+	1360  82806AA PCI64 Hub PCI Bridge
+	1361  82806AA PCI64 Hub Controller (HRes)
+		8086 1361  82806AA PCI64 Hub Controller (HRes)
+		8086 8000  82806AA PCI64 Hub Controller (HRes)
+	1460  82870P2 P64H2 Hub PCI Bridge
+	1461  82870P2 P64H2 I/OxAPIC
+	1462  82870P2 P64H2 Hot Plug Controller
+	1960  80960RP [i960RP Microprocessor]
+		101e 0431  MegaRAID 431 RAID Controller
+		101e 0438  MegaRAID 438 Ultra2 LVD RAID Controller
+		101e 0466  MegaRAID 466 Express Plus RAID Controller
+		101e 0467  MegaRAID 467 Enterprise 1500 RAID Controller
+		101e 0490  MegaRAID 490 Express 300 RAID Controller
+		101e 0762  MegaRAID 762 Express RAID Controller
+		101e 09a0  PowerEdge Expandable RAID Controller 2/SC
+		1028 0467  PowerEdge Expandable RAID Controller 2/DC
+		1028 1111  PowerEdge Expandable RAID Controller 2/SC
+		103c 03a2  MegaRAID
+		103c 10c6  MegaRAID 438, HP NetRAID-3Si
+		103c 10c7  MegaRAID T5, Integrated HP NetRAID
+		103c 10cc  MegaRAID, Integrated HP NetRAID
+		103c 10cd  HP NetRAID-1Si
+		105a 0000  SuperTrak
+		105a 2168  SuperTrak Pro
+		105a 5168  SuperTrak66/100
+		1111 1111  MegaRAID 466, PowerEdge Expandable RAID Controller 2/SC
+		1111 1112  PowerEdge Expandable RAID Controller 2/SC
+		113c 03a2  MegaRAID
+	1962  80960RM [i960RM Microprocessor]
+		105a 0000  SuperTrak SX6000 I2O CPU
+	1a21  82840 840 (Carmel) Chipset Host Bridge (Hub A)
+	1a23  82840 840 (Carmel) Chipset AGP Bridge
+	1a24  82840 840 (Carmel) Chipset PCI Bridge (Hub B)
+	1a30  82845 845 (Brookdale) Chipset Host Bridge
+	1a31  82845 845 (Brookdale) Chipset AGP Bridge
+	2410  82801AA ISA Bridge (LPC)
+	2411  82801AA IDE
+	2412  82801AA USB
+	2413  82801AA SMBus
+	2415  82801AA AC'97 Audio
+		1028 0095  Precision Workstation 220 Integrated Digital Audio
+		11d4 0040  SoundMAX Integrated Digital Audio
+		11d4 0048  SoundMAX Integrated Digital Audio
+		11d4 5340  SoundMAX Integrated Digital Audio
+	2416  82801AA AC'97 Modem
+	2418  82801AA PCI Bridge
+	2420  82801AB ISA Bridge (LPC)
+	2421  82801AB IDE
+	2422  82801AB USB
+	2423  82801AB SMBus
+	2425  82801AB AC'97 Audio
+		11d4 0040  SoundMAX Integrated Digital Audio
+		11d4 0048  SoundMAX Integrated Digital Audio
+	2426  82801AB AC'97 Modem
+	2428  82801AB PCI Bridge
+	2440  82801BA ISA Bridge (LPC)
+	2442  82801BA/BAM USB (Hub #1)
+		104d 80df  Vaio PCG-FX403
+		147b 0507  TH7II-RAID
+	2443  82801BA/BAM SMBus
+		1043 8027  TUSL2-C Mainboard
+		104d 80df  Vaio PCG-FX403
+		147b 0507  TH7II-RAID
+	2444  82801BA/BAM USB (Hub #2)
+		104d 80df  Vaio PCG-FX403
+		147b 0507  TH7II-RAID
+	2445  82801BA/BAM AC'97 Audio
+		104d 80df  Vaio PCG-FX403
+		1462 3370  STAC9721 AC
+		147b 0507  TH7II-RAID
+	2446  82801BA/BAM AC'97 Modem
+		104d 80df  Vaio PCG-FX403
+	2448  82801BAM/CAM PCI Bridge
+	2449  82801BA/BAM/CA/CAM Ethernet Controller
+		0e11 0012  EtherExpress PRO/100 VM
+		0e11 0091  EtherExpress PRO/100 VE
+		1014 01ce  EtherExpress PRO/100 VE
+		1014 01dc  EtherExpress PRO/100 VE
+		1014 01eb  EtherExpress PRO/100 VE
+		1014 01ec  EtherExpress PRO/100 VE
+		1014 0202  EtherExpress PRO/100 VE
+		1014 0205  EtherExpress PRO/100 VE
+		1014 0217  EtherExpress PRO/100 VE
+		1014 0234  EtherExpress PRO/100 VE
+		1014 023d  EtherExpress PRO/100 VE
+		1014 0244  EtherExpress PRO/100 VE
+		1014 0245  EtherExpress PRO/100 VE
+		109f 315d  EtherExpress PRO/100 VE
+		109f 3181  EtherExpress PRO/100 VE
+		1186 7801  EtherExpress PRO/100 VE
+		144d 2602  HomePNA 1M CNR
+		8086 3010  EtherExpress PRO/100 VE
+		8086 3011  EtherExpress PRO/100 VM
+		8086 3012  82562EH based Phoneline
+		8086 3013  EtherExpress PRO/100 VE
+		8086 3014  EtherExpress PRO/100 VM
+		8086 3015  82562EH based Phoneline
+		8086 3016  EtherExpress PRO/100 P Mobile Combo
+		8086 3017  EtherExpress PRO/100 P Mobile
+		8086 3018  EtherExpress PRO/100
+	244a  82801BAM IDE U100
+		1025 1016  Travelmate 612TX
+		104d 80df  Vaio PCG-FX403
+	244b  82801BA IDE U100
+		1043 8027  TUSL2-C Mainboard
+		147b 0507  TH7II-RAID
+	244c  82801BAM ISA Bridge (LPC)
+	244e  82801BA/CA/DB PCI Bridge
+	2450  82801E ISA Bridge (LPC)
+	2452  82801E USB
+	2453  82801E SMBus
+	2459  82801E Ethernet Controller 0
+	245b  82801E IDE U100
+	245d  82801E Ethernet Controller 1
+	245e  82801E PCI Bridge
+	2480  82801CA ISA Bridge (LPC)
+	2482  82801CA/CAM USB (Hub #1)
+		1014 0220  ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	2483  82801CA/CAM SMBus
+		1014 0220  ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	2484  82801CA/CAM USB (Hub #2)
+		1014 0220  ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	2485  82801CA/CAM AC'97 Audio
+		1014 0222  ThinkPad T23 (2647-4MG)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	2486  82801CA/CAM AC'97 Modem
+		1014 0223  ThinkPad A30p (2653-64G)
+		1014 0503  ThinkPad R31 2656BBG
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+		134d 4c21  Dell Inspiron 2100 internal modem
+	2487  82801CA/CAM USB (Hub #3)
+		1014 0220  ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	248a  82801CAM IDE U100
+		1014 0220  ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	248b  82801CA IDE U100
+	248c  82801CAM ISA Bridge (LPC)
+	24c0  82801DB ISA Bridge (LPC)
+	24c2  82801DB USB (Hub #1)
+	24c3  82801DB SMBus
+	24c4  82801DB USB (Hub #2)
+	24c5  82801DB AC'97 Audio
+	24c6  82801DB AC'97 Modem
+	24c7  82801DB USB (Hub #3)
+	24cb  82801DB ICH4 IDE
+	24cd  82801DB USB EHCI Controller
+	2500  82820 820 (Camino) Chipset Host Bridge (MCH)
+		1028 0095  Precision Workstation 220 Chipset
+		1043 801c  P3C-2000 system chipset
+	2501  82820 820 (Camino) Chipset Host Bridge (MCH)
+		1043 801c  P3C-2000 system chipset
+	250b  82820 820 (Camino) Chipset Host Bridge
+	250f  82820 820 (Camino) Chipset AGP Bridge
+	2520  82805AA MTH Memory Translator Hub
+	2521  82804AA MRH-S Memory Repeater Hub for SDRAM
+	2530  82850 850 (Tehama) Chipset Host Bridge (MCH)
+		147b 0507  TH7II-RAID
+	2531  82860 860 (Wombat) Chipset Host Bridge (MCH)
+	2532  82850 850 (Tehama) Chipset AGP Bridge
+	2533  82860 860 (Wombat) Chipset AGP Bridge
+	2534  82860 860 (Wombat) Chipset PCI Bridge
+	2540  e7500 [Plumas] DRAM Controller
+	2541  e7500 [Plumas] DRAM Controller Error Reporting
+	2543  e7500 [Plumas] HI_B Virtual PCI Bridge (F0)
+	2544  e7500 [Plumas] HI_B Virtual PCI Bridge (F1)
+	2545  e7500 [Plumas] HI_C Virtual PCI Bridge (F0)
+	2546  e7500 [Plumas] HI_C Virtual PCI Bridge (F1)
+	2547  e7500 [Plumas] HI_D Virtual PCI Bridge (F0)
+	2548  e7500 [Plumas] HI_D Virtual PCI Bridge (F1)
+	2560  82845G/GL [Brookdale-G] Chipset Host Bridge
+	2561  82845G/GL [Brookdale-G] Chipset AGP Bridge
+	2562  82845G/GL [Brookdale-G] Chipset Integrated Graphics Device
+	3092  Integrated RAID
+	3575  82830 830 Chipset Host Bridge
+		1014 021d  ThinkPad T23 (2647-4MG) or A30p (2653-64G)
+		104d 80e7  VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
+	3576  82830 830 Chipset AGP Bridge
+	3577  82830 CGC [Chipset Graphics Controller]
+	3578  82830 830 Chipset Host Bridge
+	5200  EtherExpress PRO/100 Intelligent Server
+	5201  EtherExpress PRO/100 Intelligent Server
+		8086 0001  EtherExpress PRO/100 Server Ethernet Adapter
+	530d  80310 IOP [IO Processor]
+	7000  82371SB PIIX3 ISA [Natoma/Triton II]
+	7010  82371SB PIIX3 IDE [Natoma/Triton II]
+	7020  82371SB PIIX3 USB [Natoma/Triton II]
+	7030  430VX - 82437VX TVX [Triton VX]
+	7100  430TX - 82439TX MTXC
+	7110  82371AB/EB/MB PIIX4 ISA
+	7111  82371AB/EB/MB PIIX4 IDE
+	7112  82371AB/EB/MB PIIX4 USB
+	7113  82371AB/EB/MB PIIX4 ACPI
+	7120  82810 GMCH [Graphics Memory Controller Hub]
+	7121  82810 CGC [Chipset Graphics Controller]
+	7122  82810 DC-100 GMCH [Graphics Memory Controller Hub]
+	7123  82810 DC-100 CGC [Chipset Graphics Controller]
+	7124  82810E DC-133 GMCH [Graphics Memory Controller Hub]
+	7125  82810E DC-133 CGC [Chipset Graphics Controller]
+	7126  82810 DC-133 System and Graphics Controller
+	7128  82810-M DC-100 System and Graphics Controller
+	712a  82810-M DC-133 System and Graphics Controller
+	7180  440LX/EX - 82443LX/EX Host bridge
+	7181  440LX/EX - 82443LX/EX AGP bridge
+	7190  440BX/ZX/DX - 82443BX/ZX/DX Host bridge
+		0e11 0500  Armada 1750 Laptop System Chipset
+		1179 0001  Toshiba Tecra 8100 Laptop System Chipset
+	7191  440BX/ZX/DX - 82443BX/ZX/DX AGP bridge
+	7192  440BX/ZX/DX - 82443BX/ZX/DX Host bridge (AGP disabled)
+		0e11 0460  Armada 1700 Laptop System Chipset
+	7194  82440MX Host Bridge
+	7195  82440MX AC'97 Audio Controller
+		10cf 1099  QSound_SigmaTel Stac97 PCI Audio
+		11d4 0040  SoundMAX Integrated Digital Audio
+		11d4 0048  SoundMAX Integrated Digital Audio
+	7196  82440MX AC'97 Modem Controller
+	7198  82440MX ISA Bridge
+	7199  82440MX EIDE Controller
+	719a  82440MX USB Universal Host Controller
+	719b  82440MX Power Management Controller
+	71a0  440GX - 82443GX Host bridge
+	71a1  440GX - 82443GX AGP bridge
+	71a2  440GX - 82443GX Host bridge (AGP disabled)
+	7600  82372FB PIIX5 ISA
+	7601  82372FB PIIX5 IDE
+	7602  82372FB PIIX5 USB
+	7603  82372FB PIIX5 SMBus
+	7800  i740
+		003d 0008  Starfighter AGP
+		003d 000b  Starfighter AGP
+		1092 0100  Stealth II G460
+		10b4 201a  Lightspeed 740
+		10b4 202f  Lightspeed 740
+		8086 0000  Terminator 2x/i
+		8086 0100  Intel740 Graphics Accelerator
+	84c4  450KX/GX [Orion] - 82454KX/GX PCI bridge
+	84c5  450KX/GX [Orion] - 82453KX/GX Memory controller
+	84ca  450NX - 82451NX Memory & I/O Controller
+	84cb  450NX - 82454NX/84460GX PCI Expander Bridge
+	84e0  460GX - 84460GX System Address Controller (SAC)
+	84e1  460GX - 84460GX System Data Controller (SDC)
+	84e2  460GX - 84460GX AGP Bridge (GXB function 2)
+	84e3  460GX - 84460GX Memory Address Controller (MAC)
+	84e4  460GX - 84460GX Memory Data Controller (MDC)
+	84e6  460GX - 82466GX Wide and fast PCI eXpander Bridge (WXB)
+	84ea  460GX - 84460GX AGP Bridge (GXB function 1)
+	9621  Integrated RAID
+	9622  Integrated RAID
+	9641  Integrated RAID
+	96a1  Integrated RAID
+	b152  21152 PCI-to-PCI Bridge
+# observed, and documented in Intel revision note; new mask of 1011:0026
+	b154  21154 PCI-to-PCI Bridge
+	b555  21555 Non transparent PCI-to-PCI Bridge
+		e4bf 1000  CC8-1-BLUES
+	ffff  450NX/GX [Orion] - 82453KX/GX Memory controller [BUG]
+8800  Trigem Computer Inc.
+	2008  Video assistent component
+8866  T-Square Design Inc.
+8888  Silicon Magic
+8e0e  Computone Corporation
+8e2e  KTI
+	3000  ET32P2
+9004  Adaptec
+	1078  AIC-7810
+	1160  AIC-1160 [Family Fibre Channel Adapter]
+	2178  AIC-7821
+	3860  AHA-2930CU
+	3b78  AHA-4844W/4844UW
+	5075  AIC-755x
+	5078  AHA-7850
+		9004 7850  AHA-2904/Integrated AIC-7850
+	5175  AIC-755x
+	5178  AIC-7851
+	5275  AIC-755x
+	5278  AIC-7852
+	5375  AIC-755x
+	5378  AIC-7850
+	5475  AIC-755x
+	5478  AIC-7850
+	5575  AVA-2930
+	5578  AIC-7855
+	5647  ANA-7711 TCP Offload Engine
+	5675  AIC-755x
+	5678  AIC-7856
+	5775  AIC-755x
+	5778  AIC-7850
+	5800  AIC-5800
+	5900  ANA-5910/5930/5940 ATM155 & 25 LAN Adapter
+	5905  ANA-5910A/5930A/5940A ATM Adapter
+	6038  AIC-3860
+	6075  AIC-1480 / APA-1480
+		9004 7560  AIC-1480 / APA-1480 Cardbus
+	6078  AIC-7860
+	6178  AIC-7861
+		9004 7861  AHA-2940AU Single
+	6278  AIC-7860
+	6378  AIC-7860
+	6478  AIC-786x
+	6578  AIC-786x
+	6678  AIC-786x
+	6778  AIC-786x
+	6915  ANA620xx/ANA69011A
+		9004 0008  ANA69011A/TX 10/100
+		9004 0009  ANA69011A/TX 10/100
+		9004 0010  ANA62022 2-port 10/100
+		9004 0018  ANA62044 4-port 10/100
+		9004 0019  ANA62044 4-port 10/100
+		9004 0020  ANA62022 2-port 10/100
+		9004 0028  ANA69011A/TX 10/100
+		9004 8008  ANA69011A/TX 64 bit 10/100
+		9004 8009  ANA69011A/TX 64 bit 10/100
+		9004 8010  ANA62022 2-port 64 bit 10/100
+		9004 8018  ANA62044 4-port 64 bit 10/100
+		9004 8019  ANA62044 4-port 64 bit 10/100
+		9004 8020  ANA62022 2-port 64 bit 10/100
+		9004 8028  ANA69011A/TX 64 bit 10/100
+	7078  AHA-294x / AIC-7870
+	7178  AHA-2940/2940W / AIC-7871
+	7278  AHA-3940/3940W / AIC-7872
+	7378  AHA-3985 / AIC-7873
+	7478  AHA-2944/2944W / AIC-7874
+	7578  AHA-3944/3944W / AIC-7875
+	7678  AHA-4944W/UW / AIC-7876
+	7778  AIC-787x
+	7810  AIC-7810
+	7815  AIC-7815 RAID+Memory Controller IC
+		9004 7815  ARO-1130U2 RAID Controller
+		9004 7840  AIC-7815 RAID+Memory Controller IC
+	7850  AIC-7850
+	7855  AHA-2930
+	7860  AIC-7860
+	7870  AIC-7870
+	7871  AHA-2940
+	7872  AHA-3940
+	7873  AHA-3980
+	7874  AHA-2944
+	7880  AIC-7880P
+	7890  AIC-7890
+	7891  AIC-789x
+	7892  AIC-789x
+	7893  AIC-789x
+	7894  AIC-789x
+	7895  AHA-2940U/UW / AHA-39xx / AIC-7895
+		9004 7890  AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+		9004 7891  AHA-2940U/2940UW Dual
+		9004 7892  AHA-3940AU/AUW/AUWD/UWD
+		9004 7894  AHA-3944AUWD
+		9004 7895  AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+		9004 7896  AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+		9004 7897  AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+	7896  AIC-789x
+	7897  AIC-789x
+	8078  AIC-7880U
+		9004 7880  AIC-7880P Ultra/Ultra Wide SCSI Chipset
+	8178  AHA-2940U/UW/D / AIC-7881U
+		9004 7881  AHA-2940UW SCSI Host Adapter
+	8278  AHA-3940U/UW/UWD / AIC-7882U
+	8378  AHA-3940U/UW / AIC-7883U
+	8478  AHA-2944UW / AIC-7884U
+	8578  AHA-3944U/UWD / AIC-7885
+	8678  AHA-4944UW / AIC-7886
+	8778  AHA-2940UW Pro / AIC-788x
+		9004 7887  2940UW Pro Ultra-Wide SCSI Controller
+	8878  AHA-2930UW / AIC-7888
+		9004 7888  AHA-2930UW SCSI Controller
+	8b78  ABA-1030
+	ec78  AHA-4944W/UW
+9005  Adaptec
+	0010  AHA-2940U2/U2W
+		9005 2180  AHA-2940U2 SCSI Controller
+		9005 8100  AHA-2940U2B SCSI Controller
+		9005 a180  AHA-2940U2W SCSI Controller
+		9005 e100  AHA-2950U2B SCSI Controller
+	0011  AHA-2930U2
+	0013  78902
+		9005 0003  AAA-131U2 Array1000 1 Channel RAID Controller
+	001f  AHA-2940U2/U2W / 7890/7891
+		9005 000f  2940U2W SCSI Controller
+		9005 a180  2940U2W SCSI Controller
+	0020  AIC-7890
+	002f  AIC-7890
+	0030  AIC-7890
+	003f  AIC-7890
+	0050  AHA-3940U2x/395U2x
+		9005 f500  AHA-3950U2B
+	0051  AHA-3950U2D
+		9005 b500  AHA-3950U2D
+	0053  AIC-7896 SCSI Controller
+		9005 ffff  AIC-7896 SCSI Controller mainboard implementation
+	005f  AIC-7896U2/7897U2
+	0080  AIC-7892A U160/m
+		0e11 e2a0  Compaq 64-Bit/66MHz Wide Ultra3 SCSI Adapter
+		9005 62a0  29160N Ultra160 SCSI Controller
+		9005 e220  29160LP Low Profile Ultra160 SCSI Controller
+		9005 e2a0  29160 Ultra160 SCSI Controller
+	0081  AIC-7892B U160/m
+		9005 62a1  19160 Ultra160 SCSI Controller
+	0083  AIC-7892D U160/m
+	008f  AIC-7892P U160/m
+	00c0  AHA-3960D / AIC-7899A U160/m
+		0e11 f620  Compaq 64-Bit/66MHz Dual Channel Wide Ultra3 SCSI Adapter
+		9005 f620  AHA-3960D U160/m
+	00c1  AIC-7899B U160/m
+	00c3  AIC-7899D U160/m
+	00c5  RAID subsystem HBA
+	00cf  AIC-7899P U160/m
+	0285  AAC-RAID
+		1028 0287  PowerEdge Expandable RAID Controller 320/DC
+907f  Atronics
+	2015  IDE-2015PL
+919a  Gigapixel Corp
+9412  Holtek
+	6565  6565
+9699  Omni Media Technology Inc
+	6565  6565
+9710  NetMos Technology
+	9815  VScom 021H-EP2 2 port parallel adaptor
+	9835  222N-2 I/O Card (2S+1P)
+a0a0  AOPEN Inc.
+a0f1  UNISYS Corporation
+a200  NEC Corporation
+a259  Hewlett Packard
+a25b  Hewlett Packard GmbH PL24-MKT
+a304  Sony
+a727  3Com Corporation
+aa42  Scitex Digital Video
+ac1e  Digital Receiver Technology Inc
+b1b3  Shiva Europe Limited
+c001  TSI Telsys
+c0a9  Micron/Crucial Technology
+c0de  Motorola
+c0fe  Motion Engineering, Inc.
+ca50  Varian Australia Pty Ltd
+cafe  Chrysalis-ITS
+cccc  Catapult Communications
+d4d4  Dy4 Systems Inc
+	0601  PCI Mezzanine Card
+d531  I+ME ACTIA GmbH
+d84d  Exsys
+dead  Indigita Corporation
+e000  Winbond
+	e000  W89C940
+e159  Tiger Jet Network Inc.
+	0001  Model 300 128k
+		0059 0001  128k ISDN-S/T Adapter
+		0059 0003  128k ISDN-U Adapter
+	0002  Tiger100APC ISDN chipset
+e4bf  EKF Elektronik GmbH
+ea01  Eagle Technology
+eabb  Aashima Technology B.V.
+eace  Endace Measurement Systems, Ltd
+	3100  DAG 3.10 OC-3/OC-12
+	3200  DAG 3.2x OC-3/OC-12
+	320e  DAG 3.2E Fast Ethernet
+	340e  DAG 3.4E Fast Ethernet
+	341e  DAG 3.41E Fast Ethernet
+	3500  DAG 3.5 OC-3/OC-12
+	351c  DAG 3.5ECM Fast Ethernet
+	4100  DAG 4.10 OC-48
+	4110  DAG 4.11 OC-48
+	4220  DAG 4.2 OC-48
+	422e  DAG 4.2E Dual Gigabit Ethernet
+ec80  Belkin Corporation
+	ec00  F5D6000
+ecc0  Echo Corporation
+edd8  ARK Logic Inc
+	a091  1000PV [Stingray]
+	a099  2000PV [Stingray]
+	a0a1  2000MT
+	a0a9  2000MI
+fa57  Fast Search & Transfer ASA
+febd  Ultraview Corp.
+feda  Epigram Inc
+fffe  VMWare Inc
+	0710  Virtual SVGA
+ffff  Illegal Vendor ID
+
+
+# List of known device classes, subclasses and programming interfaces
+
+# Syntax:
+# C class	class_name
+#	subclass	subclass_name  		<-- single tab
+#		prog-if  prog-if_name  	<-- two tabs
+
+C 00  Unclassified device
+	00  Non-VGA unclassified device
+	01  VGA compatible unclassified device
+C 01  Mass storage controller
+	00  SCSI storage controller
+	01  IDE interface
+	02  Floppy disk controller
+	03  IPI bus controller
+	04  RAID bus controller
+	80  Unknown mass storage controller
+C 02  Network controller
+	00  Ethernet controller
+	01  Token ring network controller
+	02  FDDI network controller
+	03  ATM network controller
+	04  ISDN controller
+	80  Network controller
+C 03  Display controller
+	00  VGA compatible controller
+		00  VGA
+		01  8514
+	01  XGA compatible controller
+	02  3D controller
+	80  Display controller
+C 04  Multimedia controller
+	00  Multimedia video controller
+	01  Multimedia audio controller
+	02  Computer telephony device
+	80  Multimedia controller
+C 05  Memory controller
+	00  RAM memory
+	01  FLASH memory
+	80  Memory controller
+C 06  Bridge
+	00  Host bridge
+	01  ISA bridge
+	02  EISA bridge
+	03  MicroChannel bridge
+	04  PCI bridge
+		00  Normal decode
+		01  Subtractive decode
+	05  PCMCIA bridge
+	06  NuBus bridge
+	07  CardBus bridge
+	08  RACEway bridge
+		00  Transparent mode
+		01  Endpoint mode
+	09  Semi-transparent PCI-to-PCI bridge
+		40  Primary bus towards host CPU
+		80  Secondary bus towards host CPU
+	0a  InfiniBand to PCI host bridge
+	80  Bridge
+C 07  Communication controller
+	00  Serial controller
+		00  8250
+		01  16450
+		02  16550
+		03  16650
+		04  16750
+		05  16850
+		06  16950
+	01  Parallel controller
+		00  SPP
+		01  BiDir
+		02  ECP
+		03  IEEE1284
+		fe  IEEE1284 Target
+	02  Multiport serial controller
+	03  Modem
+		00  Generic
+		01  Hayes/16450
+		02  Hayes/16550
+		03  Hayes/16650
+		04  Hayes/16750
+	80  Communication controller
+C 08  Generic system peripheral
+	00  PIC
+		00  8259
+		01  ISA PIC
+		02  EISA PIC
+		10  IO-APIC
+		20  IO(X)-APIC
+	01  DMA controller
+		00  8237
+		01  ISA DMA
+		02  EISA DMA
+	02  Timer
+		00  8254
+		01  ISA Timer
+		02  EISA Timers
+	03  RTC
+		00  Generic
+		01  ISA RTC
+	04  PCI Hot-plug controller
+	80  System peripheral
+C 09  Input device controller
+	00  Keyboard controller
+	01  Digitizer Pen
+	02  Mouse controller
+	03  Scanner controller
+	04  Gameport controller
+		00  Generic
+		10  Extended
+	80  Input device controller
+C 0a  Docking station
+	00  Generic Docking Station
+	80  Docking Station
+C 0b  Processor
+	00  386
+	01  486
+	02  Pentium
+	10  Alpha
+	20  Power PC
+	30  MIPS
+	40  Co-processor
+C 0c  Serial bus controller
+	00  FireWire (IEEE 1394)
+		00  Generic
+		10  OHCI
+	01  ACCESS Bus
+	02  SSA
+	03  USB Controller
+		00  UHCI
+		10  OHCI
+		20  EHCI
+		80  Unspecified
+		fe  USB Device
+	04  Fibre Channel
+	05  SMBus
+	06  InfiniBand
+C 0d  Wireless controller
+	00  IRDA controller
+	01  Consumer IR controller
+	10  RF controller
+	80  Wireless controller
+C 0e  Intelligent controller
+	00  I2O
+C 0f  Satellite communications controller
+	00  Satellite TV controller
+	01  Satellite audio communication controller
+	03  Satellite voice communication controller
+	04  Satellite data communication controller
+C 10  Encryption controller
+	00  Network and computing encryption device
+	10  Entertainment encryption device
+	80  Encryption controller
+C 11  Signal processing controller
+	00  DPIO module
+	01  Performance counters
+	10  Communication synchronizer
+	80  Signal processing controller
diff --git a/xen/drivers/pci/proc.c b/xen/drivers/pci/proc.c
new file mode 100644
index 0000000000..5e04ad7b33
--- /dev/null
+++ b/xen/drivers/pci/proc.c
@@ -0,0 +1,572 @@
+/*
+ *	$Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $
+ *
+ *	Procfs interface for the PCI bus.
+ *
+ *	Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+
+#define PCI_CFG_SPACE_SIZE 256
+
+static loff_t
+proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
+{
+	loff_t new;
+
+	switch (whence) {
+	case 0:
+		new = off;
+		break;
+	case 1:
+		new = file->f_pos + off;
+		break;
+	case 2:
+		new = PCI_CFG_SPACE_SIZE + off;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+		return -EINVAL;
+	return (file->f_pos = new);
+}
+
+static ssize_t
+proc_bus_pci_read(struct file *file, char *buf, size_t nbytes, loff_t *ppos)
+{
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = ino->u.generic_ip;
+	struct pci_dev *dev = dp->data;
+	unsigned int pos = *ppos;
+	unsigned int cnt, size;
+
+	/*
+	 * Normal users can read only the standardized portion of the
+	 * configuration space as several chips lock up when trying to read
+	 * undefined locations (think of Intel PIIX4 as a typical example).
+	 */
+
+	if (capable(CAP_SYS_ADMIN))
+		size = PCI_CFG_SPACE_SIZE;
+	else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+		size = 128;
+	else
+		size = 64;
+
+	if (pos >= size)
+		return 0;
+	if (nbytes >= size)
+		nbytes = size;
+	if (pos + nbytes > size)
+		nbytes = size - pos;
+	cnt = nbytes;
+
+	if (!access_ok(VERIFY_WRITE, buf, cnt))
+		return -EINVAL;
+
+	if ((pos & 1) && cnt) {
+		unsigned char val;
+		pci_read_config_byte(dev, pos, &val);
+		__put_user(val, buf);
+		buf++;
+		pos++;
+		cnt--;
+	}
+
+	if ((pos & 3) && cnt > 2) {
+		unsigned short val;
+		pci_read_config_word(dev, pos, &val);
+		__put_user(cpu_to_le16(val), (unsigned short *) buf);
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	while (cnt >= 4) {
+		unsigned int val;
+		pci_read_config_dword(dev, pos, &val);
+		__put_user(cpu_to_le32(val), (unsigned int *) buf);
+		buf += 4;
+		pos += 4;
+		cnt -= 4;
+	}
+
+	if (cnt >= 2) {
+		unsigned short val;
+		pci_read_config_word(dev, pos, &val);
+		__put_user(cpu_to_le16(val), (unsigned short *) buf);
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	if (cnt) {
+		unsigned char val;
+		pci_read_config_byte(dev, pos, &val);
+		__put_user(val, buf);
+		buf++;
+		pos++;
+		cnt--;
+	}
+
+	*ppos = pos;
+	return nbytes;
+}
+
+static ssize_t
+proc_bus_pci_write(struct file *file, const char *buf, size_t nbytes, loff_t *ppos)
+{
+	const struct inode *ino = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = ino->u.generic_ip;
+	struct pci_dev *dev = dp->data;
+	int pos = *ppos;
+	int cnt;
+
+	if (pos >= PCI_CFG_SPACE_SIZE)
+		return 0;
+	if (nbytes >= PCI_CFG_SPACE_SIZE)
+		nbytes = PCI_CFG_SPACE_SIZE;
+	if (pos + nbytes > PCI_CFG_SPACE_SIZE)
+		nbytes = PCI_CFG_SPACE_SIZE - pos;
+	cnt = nbytes;
+
+	if (!access_ok(VERIFY_READ, buf, cnt))
+		return -EINVAL;
+
+	if ((pos & 1) && cnt) {
+		unsigned char val;
+		__get_user(val, buf);
+		pci_write_config_byte(dev, pos, val);
+		buf++;
+		pos++;
+		cnt--;
+	}
+
+	if ((pos & 3) && cnt > 2) {
+		unsigned short val;
+		__get_user(val, (unsigned short *) buf);
+		pci_write_config_word(dev, pos, le16_to_cpu(val));
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	while (cnt >= 4) {
+		unsigned int val;
+		__get_user(val, (unsigned int *) buf);
+		pci_write_config_dword(dev, pos, le32_to_cpu(val));
+		buf += 4;
+		pos += 4;
+		cnt -= 4;
+	}
+
+	if (cnt >= 2) {
+		unsigned short val;
+		__get_user(val, (unsigned short *) buf);
+		pci_write_config_word(dev, pos, le16_to_cpu(val));
+		buf += 2;
+		pos += 2;
+		cnt -= 2;
+	}
+
+	if (cnt) {
+		unsigned char val;
+		__get_user(val, buf);
+		pci_write_config_byte(dev, pos, val);
+		buf++;
+		pos++;
+		cnt--;
+	}
+
+	*ppos = pos;
+	return nbytes;
+}
+
+struct pci_filp_private {
+	enum pci_mmap_state mmap_state;
+	int write_combine;
+};
+
+static int proc_bus_pci_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	const struct proc_dir_entry *dp = inode->u.generic_ip;
+	struct pci_dev *dev = dp->data;
+#ifdef HAVE_PCI_MMAP
+	struct pci_filp_private *fpriv = file->private_data;
+#endif /* HAVE_PCI_MMAP */
+	int ret = 0;
+
+	switch (cmd) {
+	case PCIIOC_CONTROLLER:
+		ret = pci_controller_num(dev);
+		break;
+
+#ifdef HAVE_PCI_MMAP
+	case PCIIOC_MMAP_IS_IO:
+		fpriv->mmap_state = pci_mmap_io;
+		break;
+
+	case PCIIOC_MMAP_IS_MEM:
+		fpriv->mmap_state = pci_mmap_mem;
+		break;
+
+	case PCIIOC_WRITE_COMBINE:
+		if (arg)
+			fpriv->write_combine = 1;
+		else
+			fpriv->write_combine = 0;
+		break;
+
+#endif /* HAVE_PCI_MMAP */
+
+	default:
+		ret = -EINVAL;
+		break;
+	};
+
+	return ret;
+}
+
+#ifdef HAVE_PCI_MMAP
+static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	const struct proc_dir_entry *dp = inode->u.generic_ip;
+	struct pci_dev *dev = dp->data;
+	struct pci_filp_private *fpriv = file->private_data;
+	int ret;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	ret = pci_mmap_page_range(dev, vma,
+				  fpriv->mmap_state,
+				  fpriv->write_combine);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int proc_bus_pci_open(struct inode *inode, struct file *file)
+{
+	struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
+
+	if (!fpriv)
+		return -ENOMEM;
+
+	fpriv->mmap_state = pci_mmap_io;
+	fpriv->write_combine = 0;
+
+	file->private_data = fpriv;
+
+	return 0;
+}
+
+static int proc_bus_pci_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	file->private_data = NULL;
+
+	return 0;
+}
+#endif /* HAVE_PCI_MMAP */
+
+static struct file_operations proc_bus_pci_operations = {
+	llseek:		proc_bus_pci_lseek,
+	read:		proc_bus_pci_read,
+	write:		proc_bus_pci_write,
+	ioctl:		proc_bus_pci_ioctl,
+#ifdef HAVE_PCI_MMAP
+	open:		proc_bus_pci_open,
+	release:	proc_bus_pci_release,
+	mmap:		proc_bus_pci_mmap,
+#ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
+	get_unmapped_area: get_pci_unmapped_area,
+#endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
+#endif /* HAVE_PCI_MMAP */
+};
+
+#if BITS_PER_LONG == 32
+#define LONG_FORMAT "\t%08lx"
+#else
+#define LONG_FORMAT "\t%16lx"
+#endif
+
+/* iterator */
+static void *pci_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct list_head *p = &pci_devices;
+	loff_t n = *pos;
+
+	/* XXX: surely we need some locking for traversing the list? */
+	while (n--) {
+		p = p->next;
+		if (p == &pci_devices)
+			return NULL;
+	}
+	return p;
+}
+static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct list_head *p = v;
+	(*pos)++;
+	return p->next != &pci_devices ? p->next : NULL;
+}
+static void pci_seq_stop(struct seq_file *m, void *v)
+{
+	/* release whatever locks we need */
+}
+
+static int show_device(struct seq_file *m, void *v)
+{
+	struct list_head *p = v;
+	const struct pci_dev *dev;
+	const struct pci_driver *drv;
+	int i;
+
+	if (p == &pci_devices)
+		return 0;
+
+	dev = pci_dev_g(p);
+	drv = pci_dev_driver(dev);
+	seq_printf(m, "%02x%02x\t%04x%04x\t%x",
+			dev->bus->number,
+			dev->devfn,
+			dev->vendor,
+			dev->device,
+			dev->irq);
+	/* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */
+	for(i=0; i<7; i++)
+		seq_printf(m, LONG_FORMAT,
+			dev->resource[i].start |
+			(dev->resource[i].flags & PCI_REGION_FLAG_MASK));
+	for(i=0; i<7; i++)
+		seq_printf(m, LONG_FORMAT,
+			dev->resource[i].start < dev->resource[i].end ?
+			dev->resource[i].end - dev->resource[i].start + 1 : 0);
+	seq_putc(m, '\t');
+	if (drv)
+		seq_printf(m, "%s", drv->name);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static struct seq_operations proc_bus_pci_devices_op = {
+	start:	pci_seq_start,
+	next:	pci_seq_next,
+	stop:	pci_seq_stop,
+	show:	show_device
+};
+
+struct proc_dir_entry *proc_bus_pci_dir;
+
+int pci_proc_attach_device(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	struct proc_dir_entry *de, *e;
+	char name[16];
+
+	if (!(de = bus->procdir)) {
+		sprintf(name, "%02x", bus->number);
+		de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+		if (!de)
+			return -ENOMEM;
+	}
+	sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+	e = dev->procent = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, de);
+	if (!e)
+		return -ENOMEM;
+	e->proc_fops = &proc_bus_pci_operations;
+	e->data = dev;
+	e->size = PCI_CFG_SPACE_SIZE;
+	return 0;
+}
+
+int pci_proc_detach_device(struct pci_dev *dev)
+{
+	struct proc_dir_entry *e;
+
+	if ((e = dev->procent)) {
+		if (atomic_read(&e->count))
+			return -EBUSY;
+		remove_proc_entry(e->name, dev->bus->procdir);
+		dev->procent = NULL;
+	}
+	return 0;
+}
+
+int pci_proc_attach_bus(struct pci_bus* bus)
+{
+	struct proc_dir_entry *de = bus->procdir;
+
+	if (!de) {
+		char name[16];
+		sprintf(name, "%02x", bus->number);
+		de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+		if (!de)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+int pci_proc_detach_bus(struct pci_bus* bus)
+{
+	struct proc_dir_entry *de = bus->procdir;
+	if (de)
+		remove_proc_entry(de->name, proc_bus_pci_dir);
+	return 0;
+}
+
+
+/*
+ *  Backward compatible /proc/pci interface.
+ */
+
+/*
+ * Convert some of the configuration space registers of the device at
+ * address (bus,devfn) into a string (possibly several lines each).
+ * The configuration string is stored starting at buf[len].  If the
+ * string would exceed the size of the buffer (SIZE), 0 is returned.
+ */
+static int show_dev_config(struct seq_file *m, void *v)
+{
+	struct list_head *p = v;
+	struct pci_dev *dev;
+	struct pci_driver *drv;
+	u32 class_rev;
+	unsigned char latency, min_gnt, max_lat, *class;
+	int reg;
+
+	if (p == &pci_devices) {
+		seq_puts(m, "PCI devices found:\n");
+		return 0;
+	}
+
+	dev = pci_dev_g(p);
+	drv = pci_dev_driver(dev);
+
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+	pci_read_config_byte (dev, PCI_LATENCY_TIMER, &latency);
+	pci_read_config_byte (dev, PCI_MIN_GNT, &min_gnt);
+	pci_read_config_byte (dev, PCI_MAX_LAT, &max_lat);
+	seq_printf(m, "  Bus %2d, device %3d, function %2d:\n",
+	       dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+	class = pci_class_name(class_rev >> 16);
+	if (class)
+		seq_printf(m, "    %s", class);
+	else
+		seq_printf(m, "    Class %04x", class_rev >> 16);
+	seq_printf(m, ": %s (rev %d).\n", dev->name, class_rev & 0xff);
+
+	if (dev->irq)
+		seq_printf(m, "      IRQ %d.\n", dev->irq);
+
+	if (latency || min_gnt || max_lat) {
+		seq_printf(m, "      Master Capable.  ");
+		if (latency)
+			seq_printf(m, "Latency=%d.  ", latency);
+		else
+			seq_puts(m, "No bursts.  ");
+		if (min_gnt)
+			seq_printf(m, "Min Gnt=%d.", min_gnt);
+		if (max_lat)
+			seq_printf(m, "Max Lat=%d.", max_lat);
+		seq_putc(m, '\n');
+	}
+
+	for (reg = 0; reg < 6; reg++) {
+		struct resource *res = dev->resource + reg;
+		unsigned long base, end, flags;
+
+		base = res->start;
+		end = res->end;
+		flags = res->flags;
+		if (!end)
+			continue;
+
+		if (flags & PCI_BASE_ADDRESS_SPACE_IO) {
+			seq_printf(m, "      I/O at 0x%lx [0x%lx].\n",
+				base, end);
+		} else {
+			const char *pref, *type = "unknown";
+
+			if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+				pref = "P";
+			else
+				pref = "Non-p";
+			switch (flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) {
+			      case PCI_BASE_ADDRESS_MEM_TYPE_32:
+				type = "32 bit"; break;
+			      case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+				type = "20 bit"; break;
+			      case PCI_BASE_ADDRESS_MEM_TYPE_64:
+				type = "64 bit"; break;
+			}
+			seq_printf(m, "      %srefetchable %s memory at "
+				       "0x%lx [0x%lx].\n", pref, type,
+				       base,
+				       end);
+		}
+	}
+	return 0;
+}
+
+static struct seq_operations proc_pci_op = {
+	start:	pci_seq_start,
+	next:	pci_seq_next,
+	stop:	pci_seq_stop,
+	show:	show_dev_config
+};
+
+static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &proc_bus_pci_devices_op);
+}
+static struct file_operations proc_bus_pci_dev_operations = {
+	open:		proc_bus_pci_dev_open,
+	read:		seq_read,
+	llseek:		seq_lseek,
+	release:	seq_release,
+};
+static int proc_pci_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &proc_pci_op);
+}
+static struct file_operations proc_pci_operations = {
+	open:		proc_pci_open,
+	read:		seq_read,
+	llseek:		seq_lseek,
+	release:	seq_release,
+};
+
+static int __init pci_proc_init(void)
+{
+	if (pci_present()) {
+		struct proc_dir_entry *entry;
+		struct pci_dev *dev;
+		proc_bus_pci_dir = proc_mkdir("pci", proc_bus);
+		entry = create_proc_entry("devices", 0, proc_bus_pci_dir);
+		if (entry)
+			entry->proc_fops = &proc_bus_pci_dev_operations;
+		pci_for_each_dev(dev) {
+			pci_proc_attach_device(dev);
+		}
+		entry = create_proc_entry("pci", 0, NULL);
+		if (entry)
+			entry->proc_fops = &proc_pci_operations;
+	}
+	return 0;
+}
+
+__initcall(pci_proc_init);
diff --git a/xen/drivers/pci/quirks.c b/xen/drivers/pci/quirks.c
new file mode 100644
index 0000000000..54e3e974d3
--- /dev/null
+++ b/xen/drivers/pci/quirks.c
@@ -0,0 +1,666 @@
+/*
+ * $Id: quirks.c,v 1.5 1998/05/02 19:24:14 mj Exp $
+ *
+ *  This file contains work-arounds for many known PCI hardware
+ *  bugs.  Devices present only on certain architectures (host
+ *  bridges et cetera) should be handled in arch-specific code.
+ *
+ *  Copyright (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ *  The bridge optimization stuff has been removed. If you really
+ *  have a silly BIOS which is unable to set your host bridge right,
+ *  use the PowerTweak utility (see http://powertweak.sourceforge.net).
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#undef DEBUG
+
+/* Deal with broken BIOS'es that neglect to enable passive release,
+   which can cause problems in combination with the 82441FX/PPro MTRRs */
+static void __init quirk_passive_release(struct pci_dev *dev)
+{
+	struct pci_dev *d = NULL;
+	unsigned char dlc;
+
+	/* We have to make sure a particular bit is set in the PIIX3
+	   ISA bridge, so we have to go out and find it. */
+	while ((d = pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) {
+		pci_read_config_byte(d, 0x82, &dlc);
+		if (!(dlc & 1<<1)) {
+			printk(KERN_ERR "PCI: PIIX3: Enabling Passive Release on %s\n", d->slot_name);
+			dlc |= 1<<1;
+			pci_write_config_byte(d, 0x82, dlc);
+		}
+	}
+}
+
+/*  The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround
+    but VIA don't answer queries. If you happen to have good contacts at VIA
+    ask them for me please -- Alan 
+    
+    This appears to be BIOS not version dependent. So presumably there is a 
+    chipset level fix */
+    
+
+int isa_dma_bridge_buggy;		/* Exported */
+    
+static void __init quirk_isa_dma_hangs(struct pci_dev *dev)
+{
+	if (!isa_dma_bridge_buggy) {
+		isa_dma_bridge_buggy=1;
+		printk(KERN_INFO "Activating ISA DMA hang workarounds.\n");
+	}
+}
+
+int pci_pci_problems;
+
+/*
+ *	Chipsets where PCI->PCI transfers vanish or hang
+ */
+
+static void __init quirk_nopcipci(struct pci_dev *dev)
+{
+	if((pci_pci_problems&PCIPCI_FAIL)==0)
+	{
+		printk(KERN_INFO "Disabling direct PCI/PCI transfers.\n");
+		pci_pci_problems|=PCIPCI_FAIL;
+	}
+}
+
+/*
+ *	Triton requires workarounds to be used by the drivers
+ */
+ 
+static void __init quirk_triton(struct pci_dev *dev)
+{
+	if((pci_pci_problems&PCIPCI_TRITON)==0)
+	{
+		printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+		pci_pci_problems|=PCIPCI_TRITON;
+	}
+}
+
+/*
+ *	VIA Apollo KT133 needs PCI latency patch
+ *	Made according to a windows driver based patch by George E. Breese
+ *	see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
+ *      Also see http://www.au-ja.org/review-kt133a-1-en.phtml for the info on which 
+ *	Mr Breese based his work.
+ *
+ *	Updated based on further information from the site and also on
+ *	information provided by VIA 
+ */
+static void __init quirk_vialatency(struct pci_dev *dev)
+{
+	struct pci_dev *p;
+	u8 rev;
+	u8 busarb;
+	/* Ok we have a potential problem chipset here. Now see if we have
+	   a buggy southbridge */
+	   
+	p=pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
+	if(p!=NULL)
+	{
+		pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
+		/* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */
+		/* Check for buggy part revisions */
+		if (rev < 0x40 || rev > 0x42) 
+			return;
+	}
+	else
+	{
+		p = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+		if(p==NULL)	/* No problem parts */
+			return;
+		pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
+		/* Check for buggy part revisions */
+		if (rev < 0x10 || rev > 0x12) 
+			return;
+	}
+	
+	/*
+	 *	Ok we have the problem. Now set the PCI master grant to 
+	 *	occur every master grant. The apparent bug is that under high
+	 *	PCI load (quite common in Linux of course) you can get data
+	 *	loss when the CPU is held off the bus for 3 bus master requests
+	 *	This happens to include the IDE controllers....
+	 *
+	 *	VIA only apply this fix when an SB Live! is present but under
+	 *	both Linux and Windows this isnt enough, and we have seen
+	 *	corruption without SB Live! but with things like 3 UDMA IDE
+	 *	controllers. So we ignore that bit of the VIA recommendation..
+	 */
+
+	pci_read_config_byte(dev, 0x76, &busarb);
+	/* Set bit 4 and bi 5 of byte 76 to 0x01 
+	   "Master priority rotation on every PCI master grant */
+	busarb &= ~(1<<5);
+	busarb |= (1<<4);
+	pci_write_config_byte(dev, 0x76, busarb);
+	printk(KERN_INFO "Applying VIA southbridge workaround.\n");
+}
+
+/*
+ *	VIA Apollo VP3 needs ETBF on BT848/878
+ */
+ 
+static void __init quirk_viaetbf(struct pci_dev *dev)
+{
+	if((pci_pci_problems&PCIPCI_VIAETBF)==0)
+	{
+		printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+		pci_pci_problems|=PCIPCI_VIAETBF;
+	}
+}
+static void __init quirk_vsfx(struct pci_dev *dev)
+{
+	if((pci_pci_problems&PCIPCI_VSFX)==0)
+	{
+		printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+		pci_pci_problems|=PCIPCI_VSFX;
+	}
+}
+
+/*
+ *	Ali Magik requires workarounds to be used by the drivers
+ *	that DMA to AGP space. Latency must be set to 0xA and triton
+ *	workaround applied too
+ *	[Info kindly provided by ALi]
+ */	
+ 
+static void __init quirk_alimagik(struct pci_dev *dev)
+{
+	if((pci_pci_problems&PCIPCI_ALIMAGIK)==0)
+	{
+		printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+		pci_pci_problems|=PCIPCI_ALIMAGIK|PCIPCI_TRITON;
+	}
+}
+
+/*
+ *	Natoma has some interesting boundary conditions with Zoran stuff
+ *	at least
+ */
+ 
+static void __init quirk_natoma(struct pci_dev *dev)
+{
+	if((pci_pci_problems&PCIPCI_NATOMA)==0)
+	{
+		printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+		pci_pci_problems|=PCIPCI_NATOMA;
+	}
+}
+
+/*
+ *  S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
+ *  If it's needed, re-allocate the region.
+ */
+
+static void __init quirk_s3_64M(struct pci_dev *dev)
+{
+	struct resource *r = &dev->resource[0];
+
+	if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) {
+		r->start = 0;
+		r->end = 0x3ffffff;
+	}
+}
+
+static void __init quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr)
+{
+	region &= ~(size-1);
+	if (region) {
+		struct resource *res = dev->resource + nr;
+
+		res->name = dev->name;
+		res->start = region;
+		res->end = region + size - 1;
+		res->flags = IORESOURCE_IO;
+		pci_claim_resource(dev, nr);
+	}
+}	
+
+/*
+ *	ATI Northbridge setups MCE the processor if you even
+ *	read somewhere between 0x3b0->0x3bb or read 0x3d3
+ */
+ 
+static void __devinit quirk_ati_exploding_mce(struct pci_dev *dev)
+{
+	printk(KERN_INFO "ATI Northbridge, reserving I/O ports 0x3b0 to 0x3bb.\n");
+	/* Mae rhaid in i beidio a edrych ar y lleoliad I/O hyn */
+	request_region(0x3b0, 0x0C, "RadeonIGP");
+	request_region(0x3d3, 0x01, "RadeonIGP");
+}
+
+/*
+ * Let's make the southbridge information explicit instead
+ * of having to worry about people probing the ACPI areas,
+ * for example.. (Yes, it happens, and if you read the wrong
+ * ACPI register it will put the machine to sleep with no
+ * way of waking it up again. Bummer).
+ *
+ * ALI M7101: Two IO regions pointed to by words at
+ *	0xE0 (64 bytes of ACPI registers)
+ *	0xE2 (32 bytes of SMB registers)
+ */
+static void __init quirk_ali7101_acpi(struct pci_dev *dev)
+{
+	u16 region;
+
+	pci_read_config_word(dev, 0xE0, &region);
+	quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
+	pci_read_config_word(dev, 0xE2, &region);
+	quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
+}
+
+/*
+ * PIIX4 ACPI: Two IO regions pointed to by longwords at
+ *	0x40 (64 bytes of ACPI registers)
+ *	0x90 (32 bytes of SMB registers)
+ */
+static void __init quirk_piix4_acpi(struct pci_dev *dev)
+{
+	u32 region;
+
+	pci_read_config_dword(dev, 0x40, &region);
+	quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
+	pci_read_config_dword(dev, 0x90, &region);
+	quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
+}
+
+/*
+ * VIA ACPI: One IO region pointed to by longword at
+ *	0x48 or 0x20 (256 bytes of ACPI registers)
+ */
+static void __init quirk_vt82c586_acpi(struct pci_dev *dev)
+{
+	u8 rev;
+	u32 region;
+
+	pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+	if (rev & 0x10) {
+		pci_read_config_dword(dev, 0x48, &region);
+		region &= PCI_BASE_ADDRESS_IO_MASK;
+		quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES);
+	}
+}
+
+/*
+ * VIA VT82C686 ACPI: Three IO region pointed to by (long)words at
+ *	0x48 (256 bytes of ACPI registers)
+ *	0x70 (128 bytes of hardware monitoring register)
+ *	0x90 (16 bytes of SMB registers)
+ */
+static void __init quirk_vt82c686_acpi(struct pci_dev *dev)
+{
+	u16 hm;
+	u32 smb;
+
+	quirk_vt82c586_acpi(dev);
+
+	pci_read_config_word(dev, 0x70, &hm);
+	hm &= PCI_BASE_ADDRESS_IO_MASK;
+	quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1);
+
+	pci_read_config_dword(dev, 0x90, &smb);
+	smb &= PCI_BASE_ADDRESS_IO_MASK;
+	quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2);
+}
+
+
+#ifdef CONFIG_X86_IO_APIC 
+extern int nr_ioapics;
+
+/*
+ * VIA 686A/B: If an IO-APIC is active, we need to route all on-chip
+ * devices to the external APIC.
+ *
+ * TODO: When we have device-specific interrupt routers,
+ * this code will go away from quirks.
+ */
+static void __init quirk_via_ioapic(struct pci_dev *dev)
+{
+	u8 tmp;
+	
+	if (nr_ioapics < 1)
+		tmp = 0;    /* nothing routed to external APIC */
+	else
+		tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
+		
+	printk(KERN_INFO "PCI: %sbling Via external APIC routing\n",
+	       tmp == 0 ? "Disa" : "Ena");
+
+	/* Offset 0x58: External APIC IRQ output control */
+	pci_write_config_byte (dev, 0x58, tmp);
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+
+/*
+ * Via 686A/B:  The PCI_INTERRUPT_LINE register for the on-chip
+ * devices, USB0/1, AC97, MC97, and ACPI, has an unusual feature:
+ * when written, it makes an internal connection to the PIC.
+ * For these devices, this register is defined to be 4 bits wide.
+ * Normally this is fine.  However for IO-APIC motherboards, or
+ * non-x86 architectures (yes Via exists on PPC among other places),
+ * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get
+ * interrupts delivered properly.
+ *
+ * TODO: When we have device-specific interrupt routers,
+ * quirk_via_irqpic will go away from quirks.
+ */
+
+/*
+ * FIXME: it is questionable that quirk_via_acpi
+ * is needed.  It shows up as an ISA bridge, and does not
+ * support the PCI_INTERRUPT_LINE register at all.  Therefore
+ * it seems like setting the pci_dev's 'irq' to the
+ * value of the ACPI SCI interrupt is only done for convenience.
+ *	-jgarzik
+ */
+static void __init quirk_via_acpi(struct pci_dev *d)
+{
+	/*
+	 * VIA ACPI device: SCI IRQ line in PCI config byte 0x42
+	 */
+	u8 irq;
+	pci_read_config_byte(d, 0x42, &irq);
+	irq &= 0xf;
+	if (irq && (irq != 2))
+		d->irq = irq;
+}
+
+static void __init quirk_via_irqpic(struct pci_dev *dev)
+{
+	u8 irq, new_irq = dev->irq & 0xf;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+
+	if (new_irq != irq) {
+		printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n",
+		       dev->slot_name, irq, new_irq);
+
+		udelay(15);
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
+	}
+}
+
+
+/*
+ * PIIX3 USB: We have to disable USB interrupts that are
+ * hardwired to PIRQD# and may be shared with an
+ * external device.
+ *
+ * Legacy Support Register (LEGSUP):
+ *     bit13:  USB PIRQ Enable (USBPIRQDEN),
+ *     bit4:   Trap/SMI On IRQ Enable (USBSMIEN).
+ *
+ * We mask out all r/wc bits, too.
+ */
+static void __init quirk_piix3_usb(struct pci_dev *dev)
+{
+	u16 legsup;
+
+	pci_read_config_word(dev, 0xc0, &legsup);
+	legsup &= 0x50ef;
+	pci_write_config_word(dev, 0xc0, legsup);
+}
+
+/*
+ * VIA VT82C598 has its device ID settable and many BIOSes
+ * set it to the ID of VT82C597 for backward compatibility.
+ * We need to switch it off to be able to recognize the real
+ * type of the chip.
+ */
+static void __init quirk_vt82c598_id(struct pci_dev *dev)
+{
+	pci_write_config_byte(dev, 0xfc, 0);
+	pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
+}
+
+/*
+ * CardBus controllers have a legacy base address that enables them
+ * to respond as i82365 pcmcia controllers.  We don't want them to
+ * do this even if the Linux CardBus driver is not loaded, because
+ * the Linux i82365 driver does not (and should not) handle CardBus.
+ */
+static void __init quirk_cardbus_legacy(struct pci_dev *dev)
+{
+	if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class)
+		return;
+	pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
+}
+
+/*
+ * The AMD io apic can hang the box when an apic irq is masked.
+ * We check all revs >= B0 (yet not in the pre production!) as the bug
+ * is currently marked NoFix
+ *
+ * We have multiple reports of hangs with this chipset that went away with
+ * noapic specified. For the moment we assume its the errata. We may be wrong
+ * of course. However the advice is demonstrably good even if so..
+ */
+ 
+static void __init quirk_amd_ioapic(struct pci_dev *dev)
+{
+	u8 rev;
+
+	pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
+	if(rev >= 0x02)
+	{
+		printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n");
+		printk(KERN_WARNING "        : booting with the \"noapic\" option.\n");
+	}
+}
+
+/*
+ * Following the PCI ordering rules is optional on the AMD762. I'm not
+ * sure what the designers were smoking but let's not inhale...
+ *
+ * To be fair to AMD, it follows the spec by default, its BIOS people
+ * who turn it off!
+ */
+ 
+static void __init quirk_amd_ordering(struct pci_dev *dev)
+{
+	u32 pcic;
+	pci_read_config_dword(dev, 0x4C, &pcic);
+	if((pcic&6)!=6)
+	{
+		pcic |= 6;
+		printk(KERN_WARNING "BIOS failed to enable PCI standards compliance, fixing this error.\n");
+		pci_write_config_dword(dev, 0x4C, pcic);
+		pci_read_config_dword(dev, 0x84, &pcic);
+		pcic |= (1<<23);	/* Required in this mode */
+		pci_write_config_dword(dev, 0x84, pcic);
+	}
+}
+
+/*
+ *	DreamWorks provided workaround for Dunord I-3000 problem
+ *
+ *	This card decodes and responds to addresses not apparently
+ *	assigned to it. We force a larger allocation to ensure that
+ *	nothing gets put too close to it.
+ */
+
+static void __init quirk_dunord ( struct pci_dev * dev )
+{
+	struct resource * r = & dev -> resource [ 1 ];
+	r -> start = 0;
+	r -> end = 0xffffff;
+}
+
+static void __init quirk_transparent_bridge(struct pci_dev *dev)
+{
+	dev->transparent = 1;
+}
+
+/*
+ * Common misconfiguration of the MediaGX/Geode PCI master that will
+ * reduce PCI bandwidth from 70MB/s to 25MB/s.  See the GXM/GXLV/GX1
+ * datasheets found at http://www.national.com/ds/GX for info on what
+ * these bits do.  <christer@weinigel.se>
+ */
+ 
+static void __init quirk_mediagx_master(struct pci_dev *dev)
+{
+	u8 reg;
+	pci_read_config_byte(dev, 0x41, &reg);
+	if (reg & 2) {
+		reg &= ~2;
+		printk(KERN_INFO "PCI: Fixup for MediaGX/Geode Slave Disconnect Boundary (0x41=0x%02x)\n", reg);
+                pci_write_config_byte(dev, 0x41, reg);
+	}
+}
+
+/*
+ * As per PCI spec, ignore base address registers 0-3 of the IDE controllers
+ * running in Compatible mode (bits 0 and 2 in the ProgIf for primary and
+ * secondary channels respectively). If the device reports Compatible mode
+ * but does use BAR0-3 for address decoding, we assume that firmware has
+ * programmed these BARs with standard values (0x1f0,0x3f4 and 0x170,0x374).
+ * Exceptions (if they exist) must be handled in chip/architecture specific
+ * fixups.
+ *
+ * Note: for non x86 people. You may need an arch specific quirk to handle
+ * moving IDE devices to native mode as well. Some plug in card devices power
+ * up in compatible mode and assume the BIOS will adjust them.
+ *
+ * Q: should we load the 0x1f0,0x3f4 into the registers or zap them as
+ * we do now ? We don't want is pci_enable_device to come along
+ * and assign new resources. Both approaches work for that.
+ */ 
+
+static void __devinit quirk_ide_bases(struct pci_dev *dev)
+{
+       struct resource *res;
+       int first_bar = 2, last_bar = 0;
+
+       if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+               return;
+
+       res = &dev->resource[0];
+
+       /* primary channel: ProgIf bit 0, BAR0, BAR1 */
+       if (!(dev->class & 1) && (res[0].flags || res[1].flags)) { 
+               res[0].start = res[0].end = res[0].flags = 0;
+               res[1].start = res[1].end = res[1].flags = 0;
+               first_bar = 0;
+               last_bar = 1;
+       }
+
+       /* secondary channel: ProgIf bit 2, BAR2, BAR3 */
+       if (!(dev->class & 4) && (res[2].flags || res[3].flags)) { 
+               res[2].start = res[2].end = res[2].flags = 0;
+               res[3].start = res[3].end = res[3].flags = 0;
+               last_bar = 3;
+       }
+
+       if (!last_bar)
+               return;
+
+       printk(KERN_INFO "PCI: Ignoring BAR%d-%d of IDE controller %s\n",
+              first_bar, last_bar, dev->slot_name);
+}
+
+/*
+ *  The main table of quirks.
+ */
+
+static struct pci_fixup pci_fixups[] __initdata = {
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_DUNORD,	PCI_DEVICE_ID_DUNORD_I3000,	quirk_dunord },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_passive_release },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82441,	quirk_passive_release },
+	/*
+	 * Its not totally clear which chipsets are the problematic ones
+	 * We know 82C586 and 82C596 variants are affected.
+	 */
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_0,	quirk_isa_dma_hangs },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C596,	quirk_isa_dma_hangs },
+	{ PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371SB_0,  quirk_isa_dma_hangs },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_868,		quirk_s3_64M },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_968,		quirk_s3_64M },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82437, 	quirk_triton }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82437VX, 	quirk_triton }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82439, 	quirk_triton }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82439TX, 	quirk_triton }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82441, 	quirk_natoma }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443LX_0, 	quirk_natoma }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443LX_1, 	quirk_natoma }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_0, 	quirk_natoma }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_1, 	quirk_natoma }, 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82443BX_2, 	quirk_natoma },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_AL, 	PCI_DEVICE_ID_AL_M1647, 	quirk_alimagik },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_AL, 	PCI_DEVICE_ID_AL_M1651, 	quirk_alimagik },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_5597,		quirk_nopcipci },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_SI,	PCI_DEVICE_ID_SI_496,		quirk_nopcipci },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8363_0,	quirk_vialatency },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8371_1,	quirk_vialatency },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8361,	quirk_vialatency },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C576,	quirk_vsfx },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C597_0,	quirk_viaetbf },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C597_0,	quirk_vt82c598_id },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_3,	quirk_vt82c586_acpi },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_4,	quirk_vt82c686_acpi },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82371AB_3,	quirk_piix4_acpi },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_AL,	PCI_DEVICE_ID_AL_M7101,		quirk_ali7101_acpi },
+ 	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82371SB_2,	quirk_piix3_usb },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82371AB_2,	quirk_piix3_usb },
+	{ PCI_FIXUP_HEADER,     PCI_ANY_ID,             PCI_ANY_ID,                     quirk_ide_bases },
+	{ PCI_FIXUP_FINAL,	PCI_ANY_ID,		PCI_ANY_ID,			quirk_cardbus_legacy },
+
+#ifdef CONFIG_X86_IO_APIC 
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686,	quirk_via_ioapic },
+#endif
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_3,	quirk_via_acpi },
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_4,	quirk_via_acpi },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C586_2,	quirk_via_irqpic },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_5,	quirk_via_irqpic },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_82C686_6,	quirk_via_irqpic },
+
+	{ PCI_FIXUP_FINAL, 	PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_VIPER_7410,	quirk_amd_ioapic },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering },
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_ATI,	PCI_DEVICE_ID_ATI_RADEON_IGP,   quirk_ati_exploding_mce },
+	/*
+	 * i82380FB mobile docking controller: its PCI-to-PCI bridge
+	 * is subtractive decoding (transparent), and does indicate this
+	 * in the ProgIf. Unfortunately, the ProgIf value is wrong - 0x80
+	 * instead of 0x01.
+	 */
+	{ PCI_FIXUP_HEADER,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82380FB,	quirk_transparent_bridge },
+
+	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_CYRIX,	PCI_DEVICE_ID_CYRIX_PCI_MASTER, quirk_mediagx_master },
+
+	{ 0 }
+};
+
+
+static void pci_do_fixups(struct pci_dev *dev, int pass, struct pci_fixup *f)
+{
+	while (f->pass) {
+		if (f->pass == pass &&
+ 		    (f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+ 		    (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+#ifdef DEBUG
+			printk(KERN_INFO "PCI: Calling quirk %p for %s\n", f->hook, dev->slot_name);
+#endif
+			f->hook(dev);
+		}
+		f++;
+	}
+}
+
+void pci_fixup_device(int pass, struct pci_dev *dev)
+{
+	pci_do_fixups(dev, pass, pcibios_fixups);
+	pci_do_fixups(dev, pass, pci_fixups);
+}
diff --git a/xen/drivers/pci/setup-bus.c b/xen/drivers/pci/setup-bus.c
new file mode 100644
index 0000000000..22e7075171
--- /dev/null
+++ b/xen/drivers/pci/setup-bus.c
@@ -0,0 +1,400 @@
+/*
+ *	drivers/pci/setup-bus.c
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     PCI-PCI bridges cleanup, sorted resource allocation.
+ * Feb 2002, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     Converted to allocation in 3 passes, which gives
+ *	     tighter packing. Prefetchable range support.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+
+
+#define DEBUG_CONFIG 1
+#if DEBUG_CONFIG
+# define DBGC(args)     printk args
+#else
+# define DBGC(args)
+#endif
+
+#define ROUND_UP(x, a)		(((x) + (a) - 1) & ~((a) - 1))
+
+static int __init
+pbus_assign_resources_sorted(struct pci_bus *bus)
+{
+	struct list_head *ln;
+	struct resource *res;
+	struct resource_list head, *list, *tmp;
+	int idx, found_vga = 0;
+
+	head.next = NULL;
+	for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+		struct pci_dev *dev = pci_dev_b(ln);
+		u16 class = dev->class >> 8;
+		u16 cmd;
+
+		/* First, disable the device to avoid side
+		   effects of possibly overlapping I/O and
+		   memory ranges.
+		   Leave VGA enabled - for obvious reason. :-)
+		   Same with all sorts of bridges - they may
+		   have VGA behind them.  */
+		if (class == PCI_CLASS_DISPLAY_VGA
+				|| class == PCI_CLASS_NOT_DEFINED_VGA)
+			found_vga = 1;
+		else if (class >> 8 != PCI_BASE_CLASS_BRIDGE) {
+			pci_read_config_word(dev, PCI_COMMAND, &cmd);
+			cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+						| PCI_COMMAND_MASTER);
+			pci_write_config_word(dev, PCI_COMMAND, cmd);
+		}
+
+		pdev_sort_resources(dev, &head);
+	}
+
+	for (list = head.next; list;) {
+		res = list->res;
+		idx = res - &list->dev->resource[0];
+		pci_assign_resource(list->dev, idx);
+		tmp = list;
+		list = list->next;
+		kfree(tmp);
+	}
+
+	return found_vga;
+}
+
+/* Initialize bridges with base/limit values we have collected.
+   PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998)
+   requires that if there is no I/O ports or memory behind the
+   bridge, corresponding range must be turned off by writing base
+   value greater than limit to the bridge's base/limit registers.  */
+static void __init
+pci_setup_bridge(struct pci_bus *bus)
+{
+	struct pbus_set_ranges_data ranges;
+	struct pci_dev *bridge = bus->self;
+	u32 l;
+
+	if (!bridge || (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+		return;
+
+	ranges.io_start = bus->resource[0]->start;
+	ranges.io_end = bus->resource[0]->end;
+	ranges.mem_start = bus->resource[1]->start;
+	ranges.mem_end = bus->resource[1]->end;
+	ranges.prefetch_start = bus->resource[2]->start;
+	ranges.prefetch_end = bus->resource[2]->end;
+	pcibios_fixup_pbus_ranges(bus, &ranges);
+
+	DBGC((KERN_INFO "PCI: Bus %d, bridge: %s\n",
+			bus->number, bridge->name));
+
+	/* Set up the top and bottom of the PCI I/O segment for this bus. */
+	if (bus->resource[0]->flags & IORESOURCE_IO) {
+		pci_read_config_dword(bridge, PCI_IO_BASE, &l);
+		l &= 0xffff0000;
+		l |= (ranges.io_start >> 8) & 0x00f0;
+		l |= ranges.io_end & 0xf000;
+		/* Set up upper 16 bits of I/O base/limit. */
+		pci_write_config_word(bridge, PCI_IO_BASE_UPPER16,
+				      ranges.io_start >> 16);
+		pci_write_config_word(bridge, PCI_IO_LIMIT_UPPER16,
+				      ranges.io_end >> 16);
+		DBGC((KERN_INFO "  IO window: %04lx-%04lx\n",
+				ranges.io_start, ranges.io_end));
+	}
+	else {
+		/* Clear upper 16 bits of I/O base/limit. */
+		pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0);
+		l = 0x00f0;
+		DBGC((KERN_INFO "  IO window: disabled.\n"));
+	}
+	pci_write_config_dword(bridge, PCI_IO_BASE, l);
+
+	/* Set up the top and bottom of the PCI Memory segment
+	   for this bus. */
+	if (bus->resource[1]->flags & IORESOURCE_MEM) {
+		l = (ranges.mem_start >> 16) & 0xfff0;
+		l |= ranges.mem_end & 0xfff00000;
+		DBGC((KERN_INFO "  MEM window: %08lx-%08lx\n",
+				ranges.mem_start, ranges.mem_end));
+	}
+	else {
+		l = 0x0000fff0;
+		DBGC((KERN_INFO "  MEM window: disabled.\n"));
+	}
+	pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+
+	/* Clear out the upper 32 bits of PREF base/limit. */
+	pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, 0);
+	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
+
+	/* Set up PREF base/limit. */
+	if (bus->resource[2]->flags & IORESOURCE_PREFETCH) {
+		l = (ranges.prefetch_start >> 16) & 0xfff0;
+		l |= ranges.prefetch_end & 0xfff00000;
+		DBGC((KERN_INFO "  PREFETCH window: %08lx-%08lx\n",
+				ranges.prefetch_start, ranges.prefetch_end));
+	}
+	else {
+		l = 0x0000fff0;
+		DBGC((KERN_INFO "  PREFETCH window: disabled.\n"));
+	}
+	pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
+
+	/* Check if we have VGA behind the bridge.
+	   Enable ISA in either case (FIXME!). */
+	l = (bus->resource[0]->flags & IORESOURCE_BUS_HAS_VGA) ? 0x0c : 0x04;
+	pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, l);
+}
+
+/* Check whether the bridge supports optional I/O and
+   prefetchable memory ranges. If not, the respective
+   base/limit registers must be read-only and read as 0. */
+static void __init
+pci_bridge_check_ranges(struct pci_bus *bus)
+{
+	u16 io;
+	u32 pmem;
+	struct pci_dev *bridge = bus->self;
+	struct resource *b_res;
+
+	if (!bridge || (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+		return;
+
+	b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
+	b_res[1].flags |= IORESOURCE_MEM;
+
+	pci_read_config_word(bridge, PCI_IO_BASE, &io);
+	if (!io) {
+		pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0);
+		pci_read_config_word(bridge, PCI_IO_BASE, &io);
+ 		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
+ 	}
+ 	if (io)
+		b_res[0].flags |= IORESOURCE_IO;
+	/*  DECchip 21050 pass 2 errata: the bridge may miss an address
+	    disconnect boundary by one PCI data phase.
+	    Workaround: do not use prefetching on this device. */
+	if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
+		return;
+	pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+	if (!pmem) {
+		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
+					       0xfff0fff0);
+		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
+	}
+	if (pmem)
+		b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
+}
+
+/* Sizing the IO windows of the PCI-PCI bridge is trivial,
+   since these windows have 4K granularity and the IO ranges
+   of non-bridge PCI devices are limited to 256 bytes.
+   We must be careful with the ISA aliasing though. */
+static void __init
+pbus_size_io(struct pci_bus *bus)
+{
+	struct list_head *ln;
+	struct resource *b_res = bus->resource[0];
+	unsigned long size = 0, size1 = 0;
+
+	if (!(b_res->flags & IORESOURCE_IO))
+		return;
+
+	for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+		struct pci_dev *dev = pci_dev_b(ln);
+		int i;
+		
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+			unsigned long r_size;
+
+			if (r->parent || !(r->flags & IORESOURCE_IO))
+				continue;
+			r_size = r->end - r->start + 1;
+
+			if (r_size < 0x400)
+				/* Might be re-aligned for ISA */
+				size += r_size;
+			else
+				size1 += r_size;
+		}
+		/* ??? Reserve some resources for CardBus. */
+		if ((dev->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
+			size1 += 4*1024;
+	}
+/* To be fixed in 2.5: we should have sort of HAVE_ISA
+   flag in the struct pci_bus. */
+#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
+	size = (size & 0xff) + ((size & ~0xffUL) << 2);
+#endif
+	size = ROUND_UP(size + size1, 4096);
+	if (!size) {
+		b_res->flags = 0;
+		return;
+	}
+	/* Alignment of the IO window is always 4K */
+	b_res->start = 4096;
+	b_res->end = b_res->start + size - 1;
+}
+
+/* Calculate the size of the bus and minimal alignment which
+   guarantees that all child resources fit in this size. */
+static void __init
+pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
+{
+	struct list_head *ln;
+	unsigned long min_align, align, size;
+	unsigned long aligns[12];	/* Alignments from 1Mb to 2Gb */
+	int order, max_order;
+	struct resource *b_res = (type & IORESOURCE_PREFETCH) ?
+				 bus->resource[2] : bus->resource[1];
+
+	memset(aligns, 0, sizeof(aligns));
+	max_order = 0;
+	size = 0;
+
+	for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+		struct pci_dev *dev = pci_dev_b(ln);
+		int i;
+		
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+			unsigned long r_size;
+
+			if (r->parent || (r->flags & mask) != type)
+				continue;
+			r_size = r->end - r->start + 1;
+			/* For bridges size != alignment */
+			align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
+			order = ffz(~align) - 20;
+			if (order > 11) {
+				printk(KERN_WARNING "PCI: region %s/%d "
+				       "too large: %lx-%lx\n",
+				       dev->slot_name, i, r->start, r->end);
+				r->flags = 0;
+				continue;
+			}
+			size += r_size;
+			if (order < 0)
+				order = 0;
+			/* Exclude ranges with size > align from
+			   calculation of the alignment. */
+			if (size == align)
+				aligns[order] += align;
+			if (order > max_order)
+				max_order = order;
+		}
+		/* ??? Reserve some resources for CardBus. */
+		if ((dev->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS) {
+			size += 1UL << 24;		/* 16 Mb */
+			aligns[24 - 20] += 1UL << 24;
+		}
+	}
+
+	align = 0;
+	min_align = 0;
+	for (order = 0; order <= max_order; order++) {
+		unsigned long align1 = 1UL << (order + 20);
+
+		if (!align)
+			min_align = align1;
+		else if (ROUND_UP(align + min_align, min_align) < align1)
+			min_align = align1 >> 1;
+		align += aligns[order];
+	}
+	size = ROUND_UP(size, min_align);
+	if (!size) {
+		b_res->flags = 0;
+		return;
+	}
+	b_res->start = min_align;
+	b_res->end = size + min_align - 1;
+}
+
+void __init
+pbus_size_bridges(struct pci_bus *bus)
+{
+	struct list_head *ln;
+	unsigned long mask, type;
+
+	for (ln=bus->children.next; ln != &bus->children; ln=ln->next)
+		pbus_size_bridges(pci_bus_b(ln));
+
+	/* The root bus? */
+	if (!bus->self)
+		return;
+
+	pci_bridge_check_ranges(bus);
+
+	pbus_size_io(bus);
+
+	mask = type = IORESOURCE_MEM;
+	/* If the bridge supports prefetchable range, size it separately. */
+	if (bus->resource[2] &&
+	    bus->resource[2]->flags & IORESOURCE_PREFETCH) {
+		pbus_size_mem(bus, IORESOURCE_PREFETCH, IORESOURCE_PREFETCH);
+		mask |= IORESOURCE_PREFETCH;	/* Size non-prefetch only. */
+	}
+	pbus_size_mem(bus, mask, type);
+}
+
+void __init
+pbus_assign_resources(struct pci_bus *bus)
+{
+	struct list_head *ln;
+	int found_vga = pbus_assign_resources_sorted(bus);
+
+	if (found_vga) {
+		struct pci_bus *b;
+
+		/* Propagate presence of the VGA to upstream bridges */
+		for (b = bus; b->parent; b = b->parent) {
+			b->resource[0]->flags |= IORESOURCE_BUS_HAS_VGA;
+		}
+	}
+	for (ln=bus->children.next; ln != &bus->children; ln=ln->next) {
+		struct pci_bus *b = pci_bus_b(ln);
+
+		pbus_assign_resources(b);
+		pci_setup_bridge(b);
+	}
+}
+
+void __init
+pci_assign_unassigned_resources(void)
+{
+	struct list_head *ln;
+	struct pci_dev *dev;
+
+	/* Depth first, calculate sizes and alignments of all
+	   subordinate buses. */
+	for(ln=pci_root_buses.next; ln != &pci_root_buses; ln=ln->next)
+		pbus_size_bridges(pci_bus_b(ln));
+	/* Depth last, allocate resources and update the hardware. */
+	for(ln=pci_root_buses.next; ln != &pci_root_buses; ln=ln->next)
+		pbus_assign_resources(pci_bus_b(ln));
+
+	pci_for_each_dev(dev) {
+		pdev_enable_device(dev);
+	}
+}
diff --git a/xen/drivers/pci/setup-irq.c b/xen/drivers/pci/setup-irq.c
new file mode 100644
index 0000000000..4c65b2e98d
--- /dev/null
+++ b/xen/drivers/pci/setup-irq.c
@@ -0,0 +1,71 @@
+/*
+ *	drivers/pci/setup-irq.c
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args)     printk args
+#else
+# define DBGC(args)
+#endif
+
+
+static void __init
+pdev_fixup_irq(struct pci_dev *dev,
+	       u8 (*swizzle)(struct pci_dev *, u8 *),
+	       int (*map_irq)(struct pci_dev *, u8, u8))
+{
+	u8 pin, slot;
+	int irq;
+
+	/* If this device is not on the primary bus, we need to figure out
+	   which interrupt pin it will come in on.   We know which slot it
+	   will come in on 'cos that slot is where the bridge is.   Each
+	   time the interrupt line passes through a PCI-PCI bridge we must
+	   apply the swizzle function.  */
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	/* Cope with 0 and illegal. */
+	if (pin == 0 || pin > 4)
+		pin = 1;
+
+	/* Follow the chain of bridges, swizzling as we go.  */
+	slot = (*swizzle)(dev, &pin);
+
+	irq = (*map_irq)(dev, slot, pin);
+	if (irq == -1)
+		irq = 0;
+	dev->irq = irq;
+
+	DBGC((KERN_ERR "PCI fixup irq: (%s) got %d\n", dev->name, dev->irq));
+
+	/* Always tell the device, so the driver knows what is
+	   the real IRQ to use; the device does not use it. */
+	pcibios_update_irq(dev, irq);
+}
+
+void __init
+pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
+	       int (*map_irq)(struct pci_dev *, u8, u8))
+{
+	struct pci_dev *dev;
+	pci_for_each_dev(dev) {
+		pdev_fixup_irq(dev, swizzle, map_irq);
+	}
+}
diff --git a/xen/drivers/pci/setup-res.c b/xen/drivers/pci/setup-res.c
new file mode 100644
index 0000000000..1053ad5489
--- /dev/null
+++ b/xen/drivers/pci/setup-res.c
@@ -0,0 +1,241 @@
+/*
+ *	drivers/pci/setup-res.c
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *	David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+/* fixed for multiple pci buses, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     Resource sorting
+ */
+
+#include <linux/init.h>
+/*#include <linux/kernel.h>*/
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args)     printk args
+#else
+# define DBGC(args)
+#endif
+
+
+int __init
+pci_claim_resource(struct pci_dev *dev, int resource)
+{
+        struct resource *res = &dev->resource[resource];
+	struct resource *root = pci_find_parent_resource(dev, res);
+	int err;
+
+	err = -EINVAL;
+	if (root != NULL) {
+		err = request_resource(root, res);
+		if (err) {
+			printk(KERN_ERR "PCI: Address space collision on "
+			       "region %d of device %s [%lx:%lx]\n",
+			       resource, dev->name, res->start, res->end);
+		}
+	} else {
+		printk(KERN_ERR "PCI: No parent found for region %d "
+		       "of device %s\n", resource, dev->name);
+	}
+
+	return err;
+}
+
+/*
+ * Given the PCI bus a device resides on, try to
+ * find an acceptable resource allocation for a
+ * specific device resource..
+ */
+static int pci_assign_bus_resource(const struct pci_bus *bus,
+	struct pci_dev *dev,
+	struct resource *res,
+	unsigned long size,
+	unsigned long min,
+	unsigned int type_mask,
+	int resno)
+{
+	unsigned long align;
+	int i;
+
+	type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
+	for (i = 0 ; i < 4; i++) {
+		struct resource *r = bus->resource[i];
+		if (!r)
+			continue;
+
+		/* type_mask must match */
+		if ((res->flags ^ r->flags) & type_mask)
+			continue;
+
+		/* We cannot allocate a non-prefetching resource
+		   from a pre-fetching area */
+		if ((r->flags & IORESOURCE_PREFETCH) &&
+		    !(res->flags & IORESOURCE_PREFETCH))
+			continue;
+
+		/* The bridge resources are special, as their
+		   size != alignment. Sizing routines return
+		   required alignment in the "start" field. */
+		align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
+
+		/* Ok, try it out.. */
+		if (allocate_resource(r, res, size, min, -1, align,
+				      pcibios_align_resource, dev) < 0)
+			continue;
+
+		/* Update PCI config space.  */
+		pcibios_update_resource(dev, r, res, resno);
+		return 0;
+	}
+	return -EBUSY;
+}
+
+int 
+pci_assign_resource(struct pci_dev *dev, int i)
+{
+	const struct pci_bus *bus = dev->bus;
+	struct resource *res = dev->resource + i;
+	unsigned long size, min;
+
+	size = res->end - res->start + 1;
+	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
+
+	/* First, try exact prefetching match.. */
+	if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH, i) < 0) {
+		/*
+		 * That failed.
+		 *
+		 * But a prefetching area can handle a non-prefetching
+		 * window (it will just not perform as well).
+		 */
+		if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0, i) < 0) {
+			printk(KERN_ERR "PCI: Failed to allocate resource %d(%lx-%lx) for %s\n",
+			       i, res->start, res->end, dev->slot_name);
+			return -EBUSY;
+		}
+	}
+
+	DBGC((KERN_ERR "  got res[%lx:%lx] for resource %d of %s\n", res->start,
+						res->end, i, dev->name));
+
+	return 0;
+}
+
+/* Sort resources by alignment */
+void __init
+pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
+{
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *r;
+		struct resource_list *list, *tmp;
+		unsigned long r_align;
+
+		r = &dev->resource[i];
+		r_align = r->end - r->start;
+		
+		if (!(r->flags) || r->parent)
+			continue;
+		if (!r_align) {
+			printk(KERN_WARNING "PCI: Ignore bogus resource %d "
+					    "[%lx:%lx] of %s\n",
+					    i, r->start, r->end, dev->name);
+			continue;
+		}
+		r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start;
+		for (list = head; ; list = list->next) {
+			unsigned long align = 0;
+			struct resource_list *ln = list->next;
+			int idx;
+
+			if (ln) {
+				idx = ln->res - &ln->dev->resource[0];
+				align = (idx < PCI_BRIDGE_RESOURCES) ?
+					ln->res->end - ln->res->start + 1 :
+					ln->res->start;
+			}
+			if (r_align > align) {
+				tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+				if (!tmp)
+					panic("pdev_sort_resources(): "
+					      "kmalloc() failed!\n");
+				tmp->next = ln;
+				tmp->res = r;
+				tmp->dev = dev;
+				list->next = tmp;
+				break;
+			}
+		}
+	}
+}
+
+void __init
+pdev_enable_device(struct pci_dev *dev)
+{
+	u32 reg;
+	u16 cmd;
+	int i;
+
+	DBGC((KERN_ERR "PCI enable device: (%s)\n", dev->name));
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
+		if (res->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		else if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	/* Special case, disable the ROM.  Several devices act funny
+	   (ie. do not respond to memory space writes) when it is left
+	   enabled.  A good example are QlogicISP adapters.  */
+
+	if (dev->rom_base_reg) {
+		pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+		reg &= ~PCI_ROM_ADDRESS_ENABLE;
+		pci_write_config_dword(dev, dev->rom_base_reg, reg);
+		dev->resource[PCI_ROM_RESOURCE].flags &= ~PCI_ROM_ADDRESS_ENABLE;
+	}
+
+	/* All of these (may) have I/O scattered all around and may not
+	   use I/O base address registers at all.  So we just have to
+	   always enable IO to these devices.  */
+	if ((dev->class >> 8) == PCI_CLASS_NOT_DEFINED
+	    || (dev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA
+	    || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE
+	    || (dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+		cmd |= PCI_COMMAND_IO;
+	}
+
+	/* ??? Always turn on bus mastering.  If the device doesn't support
+	   it, the bit will go into the bucket. */
+	cmd |= PCI_COMMAND_MASTER;
+
+	/* Set the cache line and default latency (32).  */
+	pci_write_config_word(dev, PCI_CACHE_LINE_SIZE,
+			(32 << 8) | (L1_CACHE_BYTES / sizeof(u32)));
+
+	/* Enable the appropriate bits in the PCI command register.  */
+	pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+	DBGC((KERN_ERR "  cmd reg 0x%x\n", cmd));
+}
diff --git a/xen/drivers/pci/syscall.c b/xen/drivers/pci/syscall.c
new file mode 100644
index 0000000000..c935efd9a9
--- /dev/null
+++ b/xen/drivers/pci/syscall.c
@@ -0,0 +1,144 @@
+/*
+ *	pci_syscall.c
+ *
+ * For architectures where we want to allow direct access
+ * to the PCI config stuff - it would probably be preferable
+ * on PCs too, but there people just do it by hand with the
+ * magic northbridge registers..
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+
+asmlinkage long
+sys_pciconfig_read(unsigned long bus, unsigned long dfn,
+		   unsigned long off, unsigned long len, void *buf)
+{
+	struct pci_dev *dev;
+	u8 byte;
+	u16 word;
+	u32 dword;
+	long err, cfg_ret;
+
+	err = -EPERM;
+	if (!capable(CAP_SYS_ADMIN))
+		goto error;
+
+	err = -ENODEV;
+	dev = pci_find_slot(bus, dfn);
+	if (!dev)
+		goto error;
+
+	lock_kernel();
+	switch (len) {
+	case 1:
+		cfg_ret = pci_read_config_byte(dev, off, &byte);
+		break;
+	case 2:
+		cfg_ret = pci_read_config_word(dev, off, &word);
+		break;
+	case 4:
+		cfg_ret = pci_read_config_dword(dev, off, &dword);
+		break;
+	default:
+		err = -EINVAL;
+		unlock_kernel();
+		goto error;
+	};
+	unlock_kernel();
+
+	err = -EIO;
+	if (cfg_ret != PCIBIOS_SUCCESSFUL)
+		goto error;
+
+	switch (len) {
+	case 1:
+		err = put_user(byte, (unsigned char *)buf);
+		break;
+	case 2:
+		err = put_user(word, (unsigned short *)buf);
+		break;
+	case 4:
+		err = put_user(dword, (unsigned int *)buf);
+		break;
+	};
+	return err;
+
+error:
+	/* ??? XFree86 doesn't even check the return value.  They
+	   just look for 0xffffffff in the output, since that's what
+	   they get instead of a machine check on x86.  */
+	switch (len) {
+	case 1:
+		put_user(-1, (unsigned char *)buf);
+		break;
+	case 2:
+		put_user(-1, (unsigned short *)buf);
+		break;
+	case 4:
+		put_user(-1, (unsigned int *)buf);
+		break;
+	};
+	return err;
+}
+
+asmlinkage long
+sys_pciconfig_write(unsigned long bus, unsigned long dfn,
+		    unsigned long off, unsigned long len, void *buf)
+{
+	struct pci_dev *dev;
+	u8 byte;
+	u16 word;
+	u32 dword;
+	int err = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!pcibios_present())
+		return -ENOSYS;
+
+	dev = pci_find_slot(bus, dfn);
+	if (!dev)
+		return -ENODEV;
+
+	lock_kernel();
+	switch(len) {
+	case 1:
+		err = get_user(byte, (u8 *)buf);
+		if (err)
+			break;
+		err = pci_write_config_byte(dev, off, byte);
+		if (err != PCIBIOS_SUCCESSFUL)
+			err = -EIO;
+		break;
+
+	case 2:
+		err = get_user(word, (u16 *)buf);
+		if (err)
+			break;
+		err = pci_write_config_word(dev, off, word);
+		if (err != PCIBIOS_SUCCESSFUL)
+			err = -EIO;
+		break;
+
+	case 4:
+		err = get_user(dword, (u32 *)buf);
+		if (err)
+			break;
+		err = pci_write_config_dword(dev, off, dword);
+		if (err != PCIBIOS_SUCCESSFUL)
+			err = -EIO;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	};
+	unlock_kernel();
+
+	return err;
+}
diff --git a/xen/drivers/scsi/Makefile b/xen/drivers/scsi/Makefile
new file mode 100644
index 0000000000..5b480bdf53
--- /dev/null
+++ b/xen/drivers/scsi/Makefile
@@ -0,0 +1,11 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(MAKE) -C aacraid
+	$(LD) -r -o driver.o $(OBJS) aacraid/aacraid.o
+#	$(LD) -r -o driver.o $(OBJS) 
+
+clean:
+	$(MAKE) -C aacraid clean
+	rm -f *.o *~ core
diff --git a/xen/drivers/scsi/aacraid/Makefile b/xen/drivers/scsi/aacraid/Makefile
new file mode 100644
index 0000000000..7d802c3bc9
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/Makefile
@@ -0,0 +1,17 @@
+
+include $(BASEDIR)/Rules.mk
+
+CFLAGS	+= -I$(BASEDIR)/drivers/scsi
+
+
+# -y		:= linit.o aachba.o commctrl.o comminit.o commsup.o \
+# 		   dpcsup.o rx.o sa.o
+
+default: $(OBJS)
+	$(LD) -r -o aacraid.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
+
+
+
diff --git a/xen/drivers/scsi/aacraid/README b/xen/drivers/scsi/aacraid/README
new file mode 100644
index 0000000000..9f73c6719b
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/README
@@ -0,0 +1,42 @@
+AACRAID Driver for Linux (take two)
+
+Introduction
+-------------------------
+The aacraid driver adds support for Adaptec (http://www.adaptec.com)
+RAID controllers. This is a major rewrite from the original 
+Adaptec supplied driver. It has signficantly cleaned up both the code
+and the running binary size (the module is less than half the size of
+the original).
+
+Supported Cards/Chipsets
+-------------------------
+	Dell Computer Corporation PERC 2 Quad Channel
+	Dell Computer Corporation PERC 2/Si
+	Dell Computer Corporation PERC 3/Si
+	Dell Computer Corporation PERC 3/Di
+	HP NetRAID-4M
+	ADAPTEC 2120S
+	ADAPTEC 2200S
+	ADAPTEC 5400S
+
+People
+-------------------------
+Alan Cox <alan@redhat.com>
+Christoph Hellwig <hch@infradead.org>	(small cleanups/fixes)
+Matt Domsch <matt_domsch@dell.com>	(revision ioctl, adapter messages)
+Deanna Bonds <deanna_bonds@adaptec.com> (non-DASD support, PAE fibs and 64 bit, added new adaptec controllers
+					 added new ioctls, changed scsi interface to use new error handler,
+					 increased the number of fibs and outstanding commands to a container)
+
+Original Driver
+-------------------------
+Adaptec Unix OEM Product Group
+
+Mailing List
+-------------------------
+None currently. Also note this is very different to Brian's original driver
+so don't expect him to support it.
+Adaptec does support this driver.  Contact either tech support or deanna bonds.
+
+Original by Brian Boerner February 2001
+Rewritten by Alan Cox, November 2001
diff --git a/xen/drivers/scsi/aacraid/TODO b/xen/drivers/scsi/aacraid/TODO
new file mode 100644
index 0000000000..6f71022413
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/TODO
@@ -0,0 +1,4 @@
+o	Testing
+o	More testing
+o	Feature request: display the firmware/bios/etc revisions in the
+	/proc info
diff --git a/xen/drivers/scsi/aacraid/aachba.c b/xen/drivers/scsi/aacraid/aachba.c
new file mode 100644
index 0000000000..21fc4259b8
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/aachba.c
@@ -0,0 +1,1685 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <xeno/config.h>
+/*  #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/*  #include <xeno/spinlock.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/completion.h> */
+/*  #include <asm/semaphore.h> */
+#include <asm/uaccess.h>
+#define MAJOR_NR SCSI_DISK0_MAJOR	/* For DEVICE_NR() */
+#include <linux/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+
+#include "aacraid.h"
+
+/*	SCSI Commands */
+/*	TODO:  dmb - use the ones defined in include/scsi/scsi.h */
+
+#define	SS_TEST			0x00	/* Test unit ready */
+#define SS_REZERO		0x01	/* Rezero unit */
+#define	SS_REQSEN		0x03	/* Request Sense */
+#define SS_REASGN		0x07	/* Reassign blocks */
+#define	SS_READ			0x08	/* Read 6   */
+#define	SS_WRITE		0x0A	/* Write 6  */
+#define	SS_INQUIR		0x12	/* inquiry */
+#define	SS_ST_SP		0x1B	/* Start/Stop unit */
+#define	SS_LOCK			0x1E	/* prevent/allow medium removal */
+#define SS_RESERV		0x16	/* Reserve */
+#define SS_RELES		0x17	/* Release */
+#define SS_MODESEN		0x1A	/* Mode Sense 6 */
+#define	SS_RDCAP		0x25	/* Read Capacity */
+#define	SM_READ			0x28	/* Read 10  */
+#define	SM_WRITE		0x2A	/* Write 10 */
+#define SS_SEEK			0x2B	/* Seek */
+
+/* values for inqd_pdt: Peripheral device type in plain English */
+#define	INQD_PDT_DA	0x00		/* Direct-access (DISK) device */
+#define	INQD_PDT_PROC	0x03		/* Processor device */
+#define	INQD_PDT_CHNGR	0x08		/* Changer (jukebox, scsi2) */
+#define	INQD_PDT_COMM	0x09		/* Communication device (scsi2) */
+#define	INQD_PDT_NOLUN2 0x1f		/* Unknown Device (scsi2) */
+#define	INQD_PDT_NOLUN	0x7f		/* Logical Unit Not Present */
+
+#define	INQD_PDT_DMASK	0x1F		/* Peripheral Device Type Mask */
+#define	INQD_PDT_QMASK	0xE0		/* Peripheral Device Qualifer Mask */
+
+#define	TARGET_LUN_TO_CONTAINER(target, lun)	(target)
+#define CONTAINER_TO_TARGET(cont)		((cont))
+#define CONTAINER_TO_LUN(cont)			(0)
+
+#define MAX_FIB_DATA (sizeof(struct hw_fib) - sizeof(FIB_HEADER))
+
+#define MAX_DRIVER_SG_SEGMENT_COUNT 17
+
+/*
+ *	Sense keys
+ */
+#define SENKEY_NO_SENSE      			0x00	
+#define SENKEY_UNDEFINED     			0x01	
+#define SENKEY_NOT_READY     			0x02	
+#define SENKEY_MEDIUM_ERR    			0x03	
+#define SENKEY_HW_ERR        			0x04	
+#define SENKEY_ILLEGAL       			0x05	
+#define SENKEY_ATTENTION     			0x06	
+#define SENKEY_PROTECTED     			0x07	
+#define SENKEY_BLANK         			0x08	
+#define SENKEY_V_UNIQUE      			0x09	
+#define SENKEY_CPY_ABORT     			0x0A	
+#define SENKEY_ABORT         			0x0B	
+#define SENKEY_EQUAL         			0x0C	
+#define SENKEY_VOL_OVERFLOW  			0x0D	
+#define SENKEY_MISCOMP       			0x0E	
+#define SENKEY_RESERVED      			0x0F	
+
+/*
+ *	Sense codes
+ */
+ 
+#define SENCODE_NO_SENSE                        0x00
+#define SENCODE_END_OF_DATA                     0x00
+#define SENCODE_BECOMING_READY                  0x04
+#define SENCODE_INIT_CMD_REQUIRED               0x04
+#define SENCODE_PARAM_LIST_LENGTH_ERROR         0x1A
+#define SENCODE_INVALID_COMMAND                 0x20
+#define SENCODE_LBA_OUT_OF_RANGE                0x21
+#define SENCODE_INVALID_CDB_FIELD               0x24
+#define SENCODE_LUN_NOT_SUPPORTED               0x25
+#define SENCODE_INVALID_PARAM_FIELD             0x26
+#define SENCODE_PARAM_NOT_SUPPORTED             0x26
+#define SENCODE_PARAM_VALUE_INVALID             0x26
+#define SENCODE_RESET_OCCURRED                  0x29
+#define SENCODE_LUN_NOT_SELF_CONFIGURED_YET     0x3E
+#define SENCODE_INQUIRY_DATA_CHANGED            0x3F
+#define SENCODE_SAVING_PARAMS_NOT_SUPPORTED     0x39
+#define SENCODE_DIAGNOSTIC_FAILURE              0x40
+#define SENCODE_INTERNAL_TARGET_FAILURE         0x44
+#define SENCODE_INVALID_MESSAGE_ERROR           0x49
+#define SENCODE_LUN_FAILED_SELF_CONFIG          0x4c
+#define SENCODE_OVERLAPPED_COMMAND              0x4E
+
+/*
+ *	Additional sense codes
+ */
+ 
+#define ASENCODE_NO_SENSE                       0x00
+#define ASENCODE_END_OF_DATA                    0x05
+#define ASENCODE_BECOMING_READY                 0x01
+#define ASENCODE_INIT_CMD_REQUIRED              0x02
+#define ASENCODE_PARAM_LIST_LENGTH_ERROR        0x00
+#define ASENCODE_INVALID_COMMAND                0x00
+#define ASENCODE_LBA_OUT_OF_RANGE               0x00
+#define ASENCODE_INVALID_CDB_FIELD              0x00
+#define ASENCODE_LUN_NOT_SUPPORTED              0x00
+#define ASENCODE_INVALID_PARAM_FIELD            0x00
+#define ASENCODE_PARAM_NOT_SUPPORTED            0x01
+#define ASENCODE_PARAM_VALUE_INVALID            0x02
+#define ASENCODE_RESET_OCCURRED                 0x00
+#define ASENCODE_LUN_NOT_SELF_CONFIGURED_YET    0x00
+#define ASENCODE_INQUIRY_DATA_CHANGED           0x03
+#define ASENCODE_SAVING_PARAMS_NOT_SUPPORTED    0x00
+#define ASENCODE_DIAGNOSTIC_FAILURE             0x80
+#define ASENCODE_INTERNAL_TARGET_FAILURE        0x00
+#define ASENCODE_INVALID_MESSAGE_ERROR          0x00
+#define ASENCODE_LUN_FAILED_SELF_CONFIG         0x00
+#define ASENCODE_OVERLAPPED_COMMAND             0x00
+
+#define BYTE0(x) (unsigned char)(x)
+#define BYTE1(x) (unsigned char)((x) >> 8)
+#define BYTE2(x) (unsigned char)((x) >> 16)
+#define BYTE3(x) (unsigned char)((x) >> 24)
+
+/*------------------------------------------------------------------------------
+ *              S T R U C T S / T Y P E D E F S
+ *----------------------------------------------------------------------------*/
+/* SCSI inquiry data */
+struct inquiry_data {
+	u8 inqd_pdt;		/* Peripheral qualifier | Peripheral Device Type  */
+	u8 inqd_dtq;		/* RMB | Device Type Qualifier  */
+	u8 inqd_ver;		/* ISO version | ECMA version | ANSI-approved version */
+	u8 inqd_rdf;		/* AENC | TrmIOP | Response data format */
+	u8 inqd_len;		/* Additional length (n-4) */
+	u8 inqd_pad1[2];	/* Reserved - must be zero */
+	u8 inqd_pad2;		/* RelAdr | WBus32 | WBus16 |  Sync  | Linked |Reserved| CmdQue | SftRe */
+	u8 inqd_vid[8];		/* Vendor ID */
+	u8 inqd_pid[16];	/* Product ID */
+	u8 inqd_prl[4];		/* Product Revision Level */
+};
+
+struct sense_data {
+	u8 error_code;		/* 70h (current errors), 71h(deferred errors) */
+	u8 valid:1;		/* A valid bit of one indicates that the information  */
+	/* field contains valid information as defined in the
+	 * SCSI-2 Standard.
+	 */
+	u8 segment_number;	/* Only used for COPY, COMPARE, or COPY AND VERIFY Commands */
+	u8 sense_key:4;		/* Sense Key */
+	u8 reserved:1;
+	u8 ILI:1;		/* Incorrect Length Indicator */
+	u8 EOM:1;		/* End Of Medium - reserved for random access devices */
+	u8 filemark:1;		/* Filemark - reserved for random access devices */
+
+	u8 information[4];	/* for direct-access devices, contains the unsigned 
+				 * logical block address or residue associated with 
+				 * the sense key 
+				 */
+	u8 add_sense_len;	/* number of additional sense bytes to follow this field */
+	u8 cmnd_info[4];	/* not used */
+	u8 ASC;			/* Additional Sense Code */
+	u8 ASCQ;		/* Additional Sense Code Qualifier */
+	u8 FRUC;		/* Field Replaceable Unit Code - not used */
+	u8 bit_ptr:3;		/* indicates which byte of the CDB or parameter data
+				 * was in error
+				 */
+	u8 BPV:1;		/* bit pointer valid (BPV): 1- indicates that 
+				 * the bit_ptr field has valid value
+				 */
+	u8 reserved2:2;
+	u8 CD:1;		/* command data bit: 1- illegal parameter in CDB.
+				 * 0- illegal parameter in data.
+				 */
+	u8 SKSV:1;
+	u8 field_ptr[2];	/* byte of the CDB or parameter data in error */
+};
+
+/*
+ *              M O D U L E   G L O B A L S
+ */
+ 
+static struct fsa_scsi_hba *fsa_dev[MAXIMUM_NUM_ADAPTERS]; /*  SCSI Device 
+							       Instance Ptrs */
+static struct sense_data sense_data[MAXIMUM_NUM_CONTAINERS];
+static void get_sd_devname(int disknum, char *buffer);
+static unsigned long aac_build_sg(Scsi_Cmnd* scsicmd, struct sgmap* sgmap);
+static unsigned long aac_build_sg64(Scsi_Cmnd* scsicmd, struct sgmap64* psg);
+static int aac_send_srb_fib(Scsi_Cmnd* scsicmd);
+#ifdef AAC_DETAILED_STATUS_INFO
+static char *aac_get_status_string(u32 status);
+#endif
+
+/**
+ *	aac_get_containers	-	list containers
+ *	@common: adapter to probe
+ *
+ *	Make a list of all containers on this controller
+ */
+int aac_get_containers(struct aac_dev *dev)
+{
+    struct fsa_scsi_hba *fsa_dev_ptr;
+    u32 index, status = 0;
+    struct aac_query_mount *dinfo;
+    struct aac_mount *dresp;
+    struct fib * fibptr;
+    unsigned instance;
+    
+    fsa_dev_ptr = &(dev->fsa_dev);
+    instance = dev->scsi_host_ptr->unique_id;
+    
+    if (!(fibptr = fib_alloc(dev)))
+	return -ENOMEM;
+    
+    for (index = 0; index < MAXIMUM_NUM_CONTAINERS; index++) {
+	fib_init(fibptr);
+	dinfo = (struct aac_query_mount *) fib_data(fibptr);
+	
+	dinfo->command = cpu_to_le32(VM_NameServe);
+	dinfo->count = cpu_to_le32(index);
+	dinfo->type = cpu_to_le32(FT_FILESYS);
+
+	printk("aac_get_container: getting info for container %d\n", index); 
+	status = fib_send(ContainerCommand,
+			  fibptr,
+			  sizeof (struct aac_query_mount),
+			  FsaNormal,
+			  1, 1,
+			  NULL, NULL);
+	if (status < 0 ) {
+	    printk(KERN_WARNING "ProbeContainers: SendFIB failed.\n");
+	    break;
+	}
+	dresp = (struct aac_mount *)fib_data(fibptr);
+	
+	if ((le32_to_cpu(dresp->status) == ST_OK) &&
+	    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
+	    fsa_dev_ptr->valid[index] = 1;
+	    fsa_dev_ptr->type[index] = le32_to_cpu(dresp->mnt[0].vol);
+	    fsa_dev_ptr->size[index] = le32_to_cpu(dresp->mnt[0].capacity);
+	    if (le32_to_cpu(dresp->mnt[0].state) & FSCS_READONLY)
+		fsa_dev_ptr->ro[index] = 1;
+	}
+	fib_complete(fibptr);
+	/*
+	 *	If there are no more containers, then stop asking.
+	 */
+	if ((index + 1) >= le32_to_cpu(dresp->count))
+	    break;
+    }
+    fib_free(fibptr);
+    fsa_dev[instance] = fsa_dev_ptr;
+    return status;
+}
+
+/**
+ *	probe_container		-	query a logical volume
+ *	@dev: device to query
+ *	@cid: container identifier
+ *
+ *	Queries the controller about the given volume. The volume information
+ *	is updated in the struct fsa_scsi_hba structure rather than returned.
+ */
+ 
+static int probe_container(struct aac_dev *dev, int cid)
+{
+    struct fsa_scsi_hba *fsa_dev_ptr;
+    int status;
+    struct aac_query_mount *dinfo;
+    struct aac_mount *dresp;
+    struct fib * fibptr;
+    unsigned instance;
+    
+    fsa_dev_ptr = &(dev->fsa_dev);
+    instance = dev->scsi_host_ptr->unique_id;
+    
+    if (!(fibptr = fib_alloc(dev)))
+	return -ENOMEM;
+    
+    fib_init(fibptr);
+    
+    dinfo = (struct aac_query_mount *)fib_data(fibptr);
+    
+    dinfo->command = cpu_to_le32(VM_NameServe);
+    dinfo->count = cpu_to_le32(cid);
+    dinfo->type = cpu_to_le32(FT_FILESYS);
+    
+    status = fib_send(ContainerCommand,
+		      fibptr,
+		      sizeof(struct aac_query_mount),
+		      FsaNormal,
+		      1, 1,
+		      NULL, NULL);
+    if (status < 0) {
+	printk(KERN_WARNING "aacraid: probe_containers query failed.\n");
+	goto error;
+    }
+    
+    dresp = (struct aac_mount *) fib_data(fibptr);
+    
+    if ((le32_to_cpu(dresp->status) == ST_OK) &&
+	(le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
+	fsa_dev_ptr->valid[cid] = 1;
+	fsa_dev_ptr->type[cid] = le32_to_cpu(dresp->mnt[0].vol);
+	fsa_dev_ptr->size[cid] = le32_to_cpu(dresp->mnt[0].capacity);
+	if (le32_to_cpu(dresp->mnt[0].state) & FSCS_READONLY)
+	    fsa_dev_ptr->ro[cid] = 1;
+    }
+    
+ error:
+    fib_complete(fibptr);
+    fib_free(fibptr);
+    
+    return status;
+}
+
+/* Local Structure to set SCSI inquiry data strings */
+struct scsi_inq {
+	char vid[8];         /* Vendor ID */
+	char pid[16];        /* Product ID */
+	char prl[4];         /* Product Revision Level */
+};
+
+/**
+ *	InqStrCopy	-	string merge
+ *	@a:	string to copy from
+ *	@b:	string to copy to
+ *
+ * 	Copy a String from one location to another
+ *	without copying \0
+ */
+
+static void inqstrcpy(char *a, char *b)
+{
+
+	while(*a != (char)0) 
+		*b++ = *a++;
+}
+
+static char *container_types[] = {
+        "None",
+        "Volume",
+        "Mirror",
+        "Stripe",
+        "RAID5",
+        "SSRW",
+        "SSRO",
+        "Morph",
+        "Legacy",
+        "RAID4",
+        "RAID10",             
+        "RAID00",             
+        "V-MIRRORS",          
+        "PSEUDO R4",          
+	"RAID50",
+        "Unknown"
+};
+
+
+
+/* Function: setinqstr
+ *
+ * Arguments: [1] pointer to void [1] int
+ *
+ * Purpose: Sets SCSI inquiry data strings for vendor, product
+ * and revision level. Allows strings to be set in platform dependant
+ * files instead of in OS dependant driver source.
+ */
+
+static void setinqstr(int devtype, void *data, int tindex)
+{
+	struct scsi_inq *str;
+	char *findit;
+	struct aac_driver_ident *mp;
+
+	mp = aac_get_driver_ident(devtype);
+   
+	str = (struct scsi_inq *)(data); /* cast data to scsi inq block */
+
+	inqstrcpy (mp->vname, str->vid); 
+	inqstrcpy (mp->model, str->pid); /* last six chars reserved for vol type */
+
+	findit = str->pid;
+
+	for ( ; *findit != ' '; findit++); /* walk till we find a space then incr by 1 */
+		findit++;
+	
+	if (tindex < (sizeof(container_types)/sizeof(char *))){
+		inqstrcpy (container_types[tindex], findit);
+	}
+	inqstrcpy ("V1.0", str->prl);
+}
+
+void set_sense(u8 *sense_buf, u8 sense_key, u8 sense_code,
+		    u8 a_sense_code, u8 incorrect_length,
+		    u8 bit_pointer, u16 field_pointer,
+		    u32 residue)
+{
+	sense_buf[0] = 0xF0;	/* Sense data valid, err code 70h (current error) */
+	sense_buf[1] = 0;	/* Segment number, always zero */
+
+	if (incorrect_length) {
+		sense_buf[2] = sense_key | 0x20;	/* Set ILI bit | sense key */
+		sense_buf[3] = BYTE3(residue);
+		sense_buf[4] = BYTE2(residue);
+		sense_buf[5] = BYTE1(residue);
+		sense_buf[6] = BYTE0(residue);
+	} else
+		sense_buf[2] = sense_key;	/* Sense key */
+
+	if (sense_key == SENKEY_ILLEGAL)
+		sense_buf[7] = 10;	/* Additional sense length */
+	else
+		sense_buf[7] = 6;	/* Additional sense length */
+
+	sense_buf[12] = sense_code;	/* Additional sense code */
+	sense_buf[13] = a_sense_code;	/* Additional sense code qualifier */
+	if (sense_key == SENKEY_ILLEGAL) {
+		sense_buf[15] = 0;
+
+		if (sense_code == SENCODE_INVALID_PARAM_FIELD)
+			sense_buf[15] = 0x80;	/* Std sense key specific field */
+		/* Illegal parameter is in the parameter block */
+
+		if (sense_code == SENCODE_INVALID_CDB_FIELD)
+			sense_buf[15] = 0xc0;	/* Std sense key specific field */
+		/* Illegal parameter is in the CDB block */
+		sense_buf[15] |= bit_pointer;
+		sense_buf[16] = field_pointer >> 8;	/* MSB */
+		sense_buf[17] = field_pointer;		/* LSB */
+	}
+}
+
+static void aac_io_done(Scsi_Cmnd * scsicmd)
+{
+	unsigned long cpu_flags;
+	spin_lock_irqsave(&io_request_lock, cpu_flags);
+	scsicmd->scsi_done(scsicmd);
+	spin_unlock_irqrestore(&io_request_lock, cpu_flags);
+}
+
+static void __aac_io_done(Scsi_Cmnd * scsicmd)
+{
+	scsicmd->scsi_done(scsicmd);
+}
+
+int aac_get_adapter_info(struct aac_dev* dev)
+{
+	struct fib* fibptr;
+	struct aac_adapter_info* info;
+	int rcode;
+	u32 tmp;
+
+	if (!(fibptr = fib_alloc(dev)))
+		return -ENOMEM;
+
+	fib_init(fibptr);
+	info = (struct aac_adapter_info*) fib_data(fibptr);
+
+	memset(info,0,sizeof(struct aac_adapter_info));
+
+	rcode = fib_send(RequestAdapterInfo,
+			fibptr, 
+			sizeof(struct aac_adapter_info),
+			FsaNormal, 
+			1, 1, 
+			NULL, 
+			NULL);
+
+	memcpy(&dev->adapter_info, info, sizeof(struct aac_adapter_info));
+
+	tmp = dev->adapter_info.kernelrev;
+	printk(KERN_INFO "%s%d: kernel %d.%d.%d build %d\n", 
+			dev->name, dev->id,
+			tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+			dev->adapter_info.kernelbuild);
+	tmp = dev->adapter_info.monitorrev;
+	printk(KERN_INFO "%s%d: monitor %d.%d.%d build %d\n", 
+			dev->name, dev->id,
+			tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+			dev->adapter_info.monitorbuild);
+	tmp = dev->adapter_info.biosrev;
+	printk(KERN_INFO "%s%d: bios %d.%d.%d build %d\n", 
+			dev->name, dev->id,
+			tmp>>24,(tmp>>16)&0xff,(tmp>>8)&0xff,
+			dev->adapter_info.biosbuild);
+	printk(KERN_INFO "%s%d: serial %x%x\n",
+			dev->name, dev->id,
+			dev->adapter_info.serial[0],
+			dev->adapter_info.serial[1]);
+	dev->pae_support = 0;
+	dev->nondasd_support = 0;
+	if( BITS_PER_LONG >= 64 && 
+	  (dev->adapter_info.options & AAC_OPT_SGMAP_HOST64)){
+		printk(KERN_INFO "%s%d: 64 Bit PAE enabled\n", 
+		       dev->name, dev->id);
+		dev->pae_support = 1;
+	}
+	/* TODO - dmb temporary until fw can set this bit  */
+	dev->pae_support = (BITS_PER_LONG >= 64);
+	if(dev->pae_support != 0) {
+		printk(KERN_INFO "%s%d: 64 Bit PAE enabled\n", 
+		       dev->name, dev->id);
+	}
+
+	if(dev->adapter_info.options & AAC_OPT_NONDASD){
+		dev->nondasd_support = 1;
+	}
+	return rcode;
+}
+
+
+static void read_callback(void *context, struct fib * fibptr)
+{
+	struct aac_dev *dev;
+	struct aac_read_reply *readreply;
+	Scsi_Cmnd *scsicmd;
+	u32 lba;
+	u32 cid;
+
+	scsicmd = (Scsi_Cmnd *) context;
+
+	dev = (struct aac_dev *)scsicmd->host->hostdata;
+	cid =TARGET_LUN_TO_CONTAINER(scsicmd->target, scsicmd->lun);
+
+	lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+	dprintk((KERN_DEBUG "read_callback[cpu %d]: lba = %d, t = %ld.\n", smp_processor_id(), lba, jiffies));
+
+	if (fibptr == NULL)
+		BUG();
+		
+	if(scsicmd->use_sg)
+		pci_unmap_sg(dev->pdev, 
+			(struct scatterlist *)scsicmd->buffer,
+			scsicmd->use_sg,
+			scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	else if(scsicmd->request_bufflen)
+		pci_unmap_single(dev->pdev, (dma_addr_t)(unsigned long)scsicmd->SCp.ptr,
+				 scsicmd->request_bufflen,
+				 scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	readreply = (struct aac_read_reply *)fib_data(fibptr);
+	if (le32_to_cpu(readreply->status) == ST_OK)
+		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	else {
+		printk(KERN_WARNING "read_callback: read failed, status = %d\n", readreply->status);
+		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | CHECK_CONDITION;
+		set_sense((u8 *) &sense_data[cid],
+				    SENKEY_HW_ERR,
+				    SENCODE_INTERNAL_TARGET_FAILURE,
+				    ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
+				    0, 0);
+	}
+	fib_complete(fibptr);
+	fib_free(fibptr);
+
+	aac_io_done(scsicmd);
+}
+
+static void write_callback(void *context, struct fib * fibptr)
+{
+	struct aac_dev *dev;
+	struct aac_write_reply *writereply;
+	Scsi_Cmnd *scsicmd;
+	u32 lba;
+	u32 cid;
+
+	scsicmd = (Scsi_Cmnd *) context;
+	dev = (struct aac_dev *)scsicmd->host->hostdata;
+	cid = TARGET_LUN_TO_CONTAINER(scsicmd->target, scsicmd->lun);
+
+	lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+	dprintk((KERN_DEBUG "write_callback[cpu %d]: lba = %d, t = %ld.\n", smp_processor_id(), lba, jiffies));
+	if (fibptr == NULL)
+		BUG();
+
+	if(scsicmd->use_sg)
+		pci_unmap_sg(dev->pdev, 
+			(struct scatterlist *)scsicmd->buffer,
+			scsicmd->use_sg,
+			scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	else if(scsicmd->request_bufflen)
+		pci_unmap_single(dev->pdev, (dma_addr_t)(unsigned long)scsicmd->SCp.ptr,
+				 scsicmd->request_bufflen,
+				 scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+
+	writereply = (struct aac_write_reply *) fib_data(fibptr);
+	if (le32_to_cpu(writereply->status) == ST_OK)
+		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	else {
+		printk(KERN_WARNING "write_callback: write failed, status = %d\n", writereply->status);
+		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | CHECK_CONDITION;
+		set_sense((u8 *) &sense_data[cid],
+				    SENKEY_HW_ERR,
+				    SENCODE_INTERNAL_TARGET_FAILURE,
+				    ASENCODE_INTERNAL_TARGET_FAILURE, 0, 0,
+				    0, 0);
+	}
+
+	fib_complete(fibptr);
+	fib_free(fibptr);
+	aac_io_done(scsicmd);
+}
+
+int aac_read(Scsi_Cmnd * scsicmd, int cid)
+{
+	u32 lba;
+	u32 count;
+	int status;
+
+	u16 fibsize;
+	struct aac_dev *dev;
+	struct fib * cmd_fibcontext;
+
+	dev = (struct aac_dev *)scsicmd->host->hostdata;
+	/*
+	 *	Get block address and transfer length
+	 */
+	if (scsicmd->cmnd[0] == SS_READ)	/* 6 byte command */
+	{
+		dprintk((KERN_DEBUG "aachba: received a read(6) command on target %d.\n", cid));
+
+		lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+		count = scsicmd->cmnd[4];
+
+		if (count == 0)
+			count = 256;
+	} else {
+		dprintk((KERN_DEBUG "aachba: received a read(10) command on target %d.\n", cid));
+
+		lba = (scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+		count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
+	}
+	dprintk((KERN_DEBUG "aac_read[cpu %d]: lba = %u, t = %ld.\n", smp_processor_id(), lba, jiffies));
+	/*
+	 *	Alocate and initialize a Fib
+	 */
+	if (!(cmd_fibcontext = fib_alloc(dev))) {
+		scsicmd->result = DID_ERROR << 16;
+		aac_io_done(scsicmd);
+		return (-1);
+	}
+
+	fib_init(cmd_fibcontext);
+
+	if(dev->pae_support == 1){
+		struct aac_read64 *readcmd;
+		readcmd = (struct aac_read64 *) fib_data(cmd_fibcontext);
+		readcmd->command = cpu_to_le32(VM_CtHostRead64);
+		readcmd->cid = cpu_to_le16(cid);
+		readcmd->sector_count = cpu_to_le16(count);
+		readcmd->block = cpu_to_le32(lba);
+		readcmd->pad   = cpu_to_le16(0);
+		readcmd->flags = cpu_to_le16(0); 
+		
+		aac_build_sg64(scsicmd, &readcmd->sg);
+		if(readcmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+			BUG();
+		fibsize = sizeof(struct aac_read64) + 
+		    ((readcmd->sg.count - 1) * sizeof (struct sgentry64));
+		/*
+		 *	Now send the Fib to the adapter
+		 */
+		status = fib_send(ContainerCommand64, 
+			  cmd_fibcontext, 
+			  fibsize, 
+			  FsaNormal, 
+			  0, 1, 
+			  (fib_callback) read_callback, 
+			  (void *) scsicmd);
+	} else {
+		struct aac_read *readcmd;
+		readcmd = (struct aac_read *) fib_data(cmd_fibcontext);
+		readcmd->command = cpu_to_le32(VM_CtBlockRead);
+		readcmd->cid = cpu_to_le32(cid);
+		readcmd->block = cpu_to_le32(lba);
+		readcmd->count = cpu_to_le32(count * 512);
+
+		if (count * 512 > (64 * 1024))
+			BUG();
+
+		aac_build_sg(scsicmd, &readcmd->sg);
+		if(readcmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+			BUG();
+		fibsize = sizeof(struct aac_read) + 
+		    ((readcmd->sg.count - 1) * sizeof (struct sgentry));
+		/*
+		 *	Now send the Fib to the adapter
+		 */
+		status = fib_send(ContainerCommand, 
+			  cmd_fibcontext, 
+			  fibsize, 
+			  FsaNormal, 
+			  0, 1, 
+			  (fib_callback) read_callback, 
+			  (void *) scsicmd);
+	}
+	
+	
+	/*
+	 *	Check that the command queued to the controller
+	 */
+	if (status == -EINPROGRESS) 
+		return 0;
+		
+	printk(KERN_WARNING "aac_read: fib_send failed with status: %d.\n", 
+	       status);
+	/*
+	 *	For some reason, the Fib didn't queue, return QUEUE_FULL
+	 */
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL;
+	aac_io_done(scsicmd);
+	fib_complete(cmd_fibcontext);
+	fib_free(cmd_fibcontext);
+	return -1;
+}
+
+static int aac_write(Scsi_Cmnd * scsicmd, int cid)
+{
+	u32 lba;
+	u32 count;
+	int status;
+	u16 fibsize;
+	struct aac_dev *dev;
+	struct fib * cmd_fibcontext;
+
+	dev = (struct aac_dev *)scsicmd->host->hostdata;
+	/*
+	 *	Get block address and transfer length
+	 */
+	if (scsicmd->cmnd[0] == SS_WRITE)	/* 6 byte command */
+	{
+		lba = ((scsicmd->cmnd[1] & 0x1F) << 16) | (scsicmd->cmnd[2] << 8) | scsicmd->cmnd[3];
+		count = scsicmd->cmnd[4];
+		if (count == 0)
+			count = 256;
+	} else {
+		dprintk((KERN_DEBUG "aachba: received a write(10) command on target %d.\n", cid));
+		lba = (scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+		count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
+	}
+	dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %u, t = %ld.\n", 
+		 smp_processor_id(), lba, jiffies));
+	/*
+	 *	Allocate and initialize a Fib then setup a BlockWrite command
+	 */
+	if (!(cmd_fibcontext = fib_alloc(dev))) {
+		scsicmd->result = DID_ERROR << 16;
+		aac_io_done(scsicmd);
+		return -1;
+	}
+	fib_init(cmd_fibcontext);
+
+	if(dev->pae_support == 1)
+	{
+		struct aac_write64 *writecmd;
+		writecmd = (struct aac_write64 *) fib_data(cmd_fibcontext);
+		writecmd->command = cpu_to_le32(VM_CtHostWrite64);
+		writecmd->cid = cpu_to_le16(cid);
+		writecmd->sector_count = cpu_to_le16(count); 
+		writecmd->block = cpu_to_le32(lba);
+		writecmd->pad	= cpu_to_le16(0);
+		writecmd->flags	= cpu_to_le16(0);
+
+		aac_build_sg64(scsicmd, &writecmd->sg);
+		if(writecmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+			BUG();
+		fibsize = sizeof(struct aac_write64) + 
+		    ((writecmd->sg.count - 1) * sizeof (struct sgentry64));
+		/*
+		 *	Now send the Fib to the adapter
+		 */
+		status = fib_send(ContainerCommand64, 
+			  cmd_fibcontext, 
+			  fibsize, 
+			  FsaNormal, 
+			  0, 1, 
+			  (fib_callback) write_callback, 
+			  (void *) scsicmd);
+	}
+	else 
+	{
+		struct aac_write *writecmd;
+		writecmd = (struct aac_write *) fib_data(cmd_fibcontext);
+		writecmd->command = cpu_to_le32(VM_CtBlockWrite);
+		writecmd->cid = cpu_to_le32(cid);
+		writecmd->block = cpu_to_le32(lba);
+		writecmd->count = cpu_to_le32(count * 512);
+		writecmd->sg.count = cpu_to_le32(1);
+		/* ->stable is not used - it did mean which type of write */
+
+		if (count * 512 > (64 * 1024))
+			BUG();
+		aac_build_sg(scsicmd, &writecmd->sg);
+		if(writecmd->sg.count > MAX_DRIVER_SG_SEGMENT_COUNT)
+			BUG();
+		fibsize = sizeof(struct aac_write) + 
+		    ((writecmd->sg.count - 1) * sizeof (struct sgentry));
+		/*
+		 *	Now send the Fib to the adapter
+		 */
+		status = fib_send(ContainerCommand, 
+			  cmd_fibcontext, 
+			  fibsize, 
+			  FsaNormal, 
+			  0, 1, 
+			  (fib_callback) write_callback, 
+			  (void *) scsicmd);
+	}
+
+	/*
+	 *	Check that the command queued to the controller
+	 */
+	if (status == -EINPROGRESS)
+		return 0;
+
+	printk(KERN_WARNING "aac_write: fib_send failed with status: %d\n", status);
+	/*
+	 *	For some reason, the Fib didn't queue, return QUEUE_FULL
+	 */
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL;
+	aac_io_done(scsicmd);
+
+	fib_complete(cmd_fibcontext);
+	fib_free(cmd_fibcontext);
+	return -1;
+}
+
+
+/**
+ *	aac_scsi_cmd()		-	Process SCSI command
+ *	@scsicmd:		SCSI command block
+ *	@wait:			1 if the user wants to await completion
+ *
+ *	Emulate a SCSI command and queue the required request for the
+ *	aacraid firmware.
+ */
+ 
+int aac_scsi_cmd(Scsi_Cmnd * scsicmd)
+{
+    u32 cid = 0;
+    struct fsa_scsi_hba *fsa_dev_ptr;
+    int cardtype;
+    int ret;
+    struct aac_dev *dev = (struct aac_dev *)scsicmd->host->hostdata;
+    
+    cardtype = dev->cardtype;
+
+    fsa_dev_ptr = fsa_dev[scsicmd->host->unique_id];
+    
+    /*
+     *	If the bus, target or lun is out of range, return fail
+     *	Test does not apply to ID 16, the pseudo id for the controller
+     *	itself.
+     */
+    if (scsicmd->target != scsicmd->host->this_id) {
+	if ((scsicmd->channel == 0) ){
+	    if( (scsicmd->target >= AAC_MAX_TARGET) || (scsicmd->lun != 0)){ 
+		scsicmd->result = DID_NO_CONNECT << 16;
+		__aac_io_done(scsicmd);
+		return 0;
+	    }
+	    cid = TARGET_LUN_TO_CONTAINER(scsicmd->target, scsicmd->lun);
+	    
+	    /*
+	     *	If the target container doesn't exist, it may have
+	     *	been newly created
+	     */
+	    if (fsa_dev_ptr->valid[cid] == 0) {
+		switch (scsicmd->cmnd[0]) {
+		case SS_INQUIR:
+		case SS_RDCAP:
+		case SS_TEST:
+		    spin_unlock_irq(&io_request_lock);
+		    probe_container(dev, cid);
+		    spin_lock_irq(&io_request_lock);
+		    if (fsa_dev_ptr->valid[cid] == 0) {
+			scsicmd->result = DID_NO_CONNECT << 16;
+			__aac_io_done(scsicmd);
+			return 0;
+		    }
+		default:
+		    break;
+		}
+	    }
+	    /*
+	     *	If the target container still doesn't exist, 
+	     *	return failure
+	     */
+	    if (fsa_dev_ptr->valid[cid] == 0) {
+		scsicmd->result = DID_BAD_TARGET << 16;
+		__aac_io_done(scsicmd);
+		return -1;
+			}
+	} else {  /* check for physical non-dasd devices */
+	    if(dev->nondasd_support == 1){
+		return aac_send_srb_fib(scsicmd);
+	    } else {
+		scsicmd->result = DID_NO_CONNECT << 16;
+		__aac_io_done(scsicmd);
+		return 0;
+	    }
+	}
+    }
+    /*
+     * else Command for the controller itself
+     */
+    else if ((scsicmd->cmnd[0] != SS_INQUIR) &&	
+	     (scsicmd->cmnd[0] != SS_TEST)) 
+    {
+	/* only INQUIRY & TUR cmnd supported for controller */
+	dprintk((KERN_WARNING "Only INQUIRY & TUR command supported for "
+		 "controller, rcvd = 0x%x.\n", scsicmd->cmnd[0]));
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | 
+	    CHECK_CONDITION;
+	set_sense((u8 *) &sense_data[cid],
+		  SENKEY_ILLEGAL,
+		  SENCODE_INVALID_COMMAND,
+		  ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
+	__aac_io_done(scsicmd);
+	return -1;
+    }
+    
+    
+    /* Handle commands here that don't require going out to the adapter */
+    switch (scsicmd->cmnd[0]) {
+    case SS_INQUIR:
+    {
+	struct inquiry_data *inq_data_ptr;
+	
+	dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", scsicmd->target));
+	inq_data_ptr = (struct inquiry_data *)scsicmd->request_buffer;
+	memset(inq_data_ptr, 0, sizeof (struct inquiry_data));
+	
+	inq_data_ptr->inqd_ver = 2;	/* claim compliance to SCSI-2 */
+	inq_data_ptr->inqd_dtq = 0x80;	/* set RMB bit to one indicating that the medium is removable */
+	inq_data_ptr->inqd_rdf = 2;	/* A response data format value of two indicates that the data shall be in the format specified in SCSI-2 */
+	inq_data_ptr->inqd_len = 31;
+	/*Format for "pad2" is  RelAdr | WBus32 | WBus16 |  Sync  | Linked |Reserved| CmdQue | SftRe */
+	inq_data_ptr->inqd_pad2= 0x32 ;	 /*WBus16|Sync|CmdQue */
+	/*
+	 *	Set the Vendor, Product, and Revision Level
+	 *	see: <vendor>.c i.e. aac.c
+	 */
+	setinqstr(cardtype, (void *) (inq_data_ptr->inqd_vid), fsa_dev_ptr->type[cid]);
+	if (scsicmd->target == scsicmd->host->this_id)
+	    inq_data_ptr->inqd_pdt = INQD_PDT_PROC;	/* Processor device */
+	else
+	    inq_data_ptr->inqd_pdt = INQD_PDT_DA;	/* Direct/random access device */
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	__aac_io_done(scsicmd);
+	return 0;
+    }
+    case SS_RDCAP:
+    {
+	int capacity;
+	char *cp;
+	
+	dprintk((KERN_DEBUG "READ CAPACITY command.\n"));
+	capacity = fsa_dev_ptr->size[cid] - 1;
+	cp = scsicmd->request_buffer;
+	cp[0] = (capacity >> 24) & 0xff;
+	cp[1] = (capacity >> 16) & 0xff;
+	cp[2] = (capacity >> 8) & 0xff;
+	cp[3] = (capacity >> 0) & 0xff;
+	cp[4] = 0;
+	cp[5] = 0;
+	cp[6] = 2;
+	cp[7] = 0;
+	
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	__aac_io_done(scsicmd);
+	
+	return 0;
+    }
+    
+    case SS_MODESEN:
+    {
+	char *mode_buf;
+	
+	dprintk((KERN_DEBUG "MODE SENSE command.\n"));
+	mode_buf = scsicmd->request_buffer;
+	mode_buf[0] = 0;  /* Mode data length (MSB) */
+	mode_buf[1] = 6;  /* Mode data length (LSB) */
+	mode_buf[2] = 0;  /* Medium type - default */
+	mode_buf[3] = 0;  /* Device-specific param, 
+			     bit 8: 0/1 = write enabled/protected */
+	mode_buf[4] = 0;  /* reserved */
+	mode_buf[5] = 0;  /* reserved */
+	mode_buf[6] = 0;  /* Block descriptor length (MSB) */
+	mode_buf[7] = 0;  /* Block descriptor length (LSB) */
+	
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	__aac_io_done(scsicmd);
+	
+	return 0;
+    }
+    case SS_REQSEN:
+	dprintk((KERN_DEBUG "REQUEST SENSE command.\n"));
+	memcpy(scsicmd->sense_buffer, &sense_data[cid], 
+	       sizeof (struct sense_data));
+	memset(&sense_data[cid], 0, sizeof (struct sense_data));
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	__aac_io_done(scsicmd);
+	return (0);
+	
+    case SS_LOCK:
+	dprintk((KERN_DEBUG "LOCK command.\n"));
+	if (scsicmd->cmnd[4])
+	    fsa_dev_ptr->locked[cid] = 1;
+	else
+	    fsa_dev_ptr->locked[cid] = 0;
+	
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	__aac_io_done(scsicmd);
+	return 0;
+	/*
+	 *	These commands are all No-Ops
+	 */
+    case SS_TEST:
+    case SS_RESERV:
+    case SS_RELES:
+    case SS_REZERO:
+    case SS_REASGN:
+    case SS_SEEK:
+    case SS_ST_SP:
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | GOOD;
+	__aac_io_done(scsicmd);
+	return (0);
+    }
+    
+    switch (scsicmd->cmnd[0]) 
+    {
+    case SS_READ:
+    case SM_READ:
+	/*
+	 *	Hack to keep track of ordinal number of the device that
+	 *	corresponds to a container. Needed to convert
+	 *	containers to /dev/sd device names
+	 */
+	
+	spin_unlock_irq(&io_request_lock);
+	fsa_dev_ptr->devno[cid] = DEVICE_NR(scsicmd->request.rq_dev);
+	ret = aac_read(scsicmd, cid);
+	spin_lock_irq(&io_request_lock);
+	return ret;
+	
+    case SS_WRITE:
+    case SM_WRITE:
+	spin_unlock_irq(&io_request_lock);
+	ret = aac_write(scsicmd, cid);
+	spin_lock_irq(&io_request_lock);
+	return ret;
+    default:
+	/*
+	 *	Unhandled commands
+	 */
+	printk(KERN_WARNING "Unhandled SCSI Command: 0x%x.\n", 
+	       scsicmd->cmnd[0]);
+	scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | 
+	    CHECK_CONDITION;
+	set_sense((u8 *) &sense_data[cid],
+		  SENKEY_ILLEGAL, SENCODE_INVALID_COMMAND,
+		  ASENCODE_INVALID_COMMAND, 0, 0, 0, 0);
+	__aac_io_done(scsicmd);
+	return -1;
+    }
+}
+
+static int query_disk(struct aac_dev *dev, void *arg)
+{
+    struct aac_query_disk qd;
+    struct fsa_scsi_hba *fsa_dev_ptr;
+    
+    fsa_dev_ptr = &(dev->fsa_dev);
+    if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
+	return -EFAULT;
+    if (qd.cnum == -1)
+	qd.cnum = TARGET_LUN_TO_CONTAINER(qd.target, qd.lun);
+    else if ((qd.bus == -1) && (qd.target == -1) && (qd.lun == -1)) 
+    {
+	if (qd.cnum < 0 || qd.cnum > MAXIMUM_NUM_CONTAINERS)
+	    return -EINVAL;
+	qd.instance = dev->scsi_host_ptr->host_no;
+	qd.bus = 0;
+	qd.target = CONTAINER_TO_TARGET(qd.cnum);
+	qd.lun = CONTAINER_TO_LUN(qd.cnum);
+    }
+    else return -EINVAL;
+    
+    qd.valid = fsa_dev_ptr->valid[qd.cnum];
+    qd.locked = fsa_dev_ptr->locked[qd.cnum];
+    qd.deleted = fsa_dev_ptr->deleted[qd.cnum];
+    
+    if (fsa_dev_ptr->devno[qd.cnum] == -1)
+	qd.unmapped = 1;
+    else
+	qd.unmapped = 0;
+    
+    get_sd_devname(fsa_dev_ptr->devno[qd.cnum], qd.name);
+    
+    if (copy_to_user(arg, &qd, sizeof (struct aac_query_disk)))
+	return -EFAULT;
+    return 0;
+}
+
+static void get_sd_devname(int disknum, char *buffer)
+{
+    if (disknum < 0) {
+	sprintf(buffer, "%s", "");
+	return;
+    }
+    
+    if (disknum < 26)
+	sprintf(buffer, "sd%c", 'a' + disknum);
+    else {
+	unsigned int min1;
+	unsigned int min2;
+	/*
+	 * For larger numbers of disks, we need to go to a new
+	 * naming scheme.
+	 */
+	min1 = disknum / 26;
+	min2 = disknum % 26;
+	sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
+    }
+}
+
+static int force_delete_disk(struct aac_dev *dev, void *arg)
+{
+    struct aac_delete_disk dd;
+    struct fsa_scsi_hba *fsa_dev_ptr;
+    
+    fsa_dev_ptr = &(dev->fsa_dev);
+    
+    if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
+	return -EFAULT;
+    
+    if (dd.cnum > MAXIMUM_NUM_CONTAINERS)
+	return -EINVAL;
+    /*
+     *	Mark this container as being deleted.
+     */
+    fsa_dev_ptr->deleted[dd.cnum] = 1;
+    /*
+     *	Mark the container as no longer valid
+     */
+    fsa_dev_ptr->valid[dd.cnum] = 0;
+    return 0;
+}
+
+static int delete_disk(struct aac_dev *dev, void *arg)
+{
+    struct aac_delete_disk dd;
+    struct fsa_scsi_hba *fsa_dev_ptr;
+
+    fsa_dev_ptr = &(dev->fsa_dev);
+
+    if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
+	return -EFAULT;
+
+    if (dd.cnum > MAXIMUM_NUM_CONTAINERS)
+	return -EINVAL;
+    /*
+     *	If the container is locked, it can not be deleted by the API.
+     */
+    if (fsa_dev_ptr->locked[dd.cnum])
+	return -EBUSY;
+    else {
+	/*
+	 *	Mark the container as no longer being valid.
+	 */
+	fsa_dev_ptr->valid[dd.cnum] = 0;
+	fsa_dev_ptr->devno[dd.cnum] = -1;
+	return 0;
+    }
+}
+
+int aac_dev_ioctl(struct aac_dev *dev, int cmd, void *arg)
+{
+    switch (cmd) {
+    case FSACTL_QUERY_DISK:
+	return query_disk(dev, arg);
+    case FSACTL_DELETE_DISK:
+	return delete_disk(dev, arg);
+    case FSACTL_FORCE_DELETE_DISK:
+	return force_delete_disk(dev, arg);
+    case 2131:
+	return aac_get_containers(dev);
+    default:
+	return -ENOTTY;
+    }
+}
+
+/**
+ *
+ * aac_srb_callback
+ * @context: the context set in the fib - here it is scsi cmd
+ * @fibptr: pointer to the fib
+ *
+ * Handles the completion of a scsi command to a non dasd device
+ *
+ */
+
+static void aac_srb_callback(void *context, struct fib * fibptr)
+{
+    struct aac_dev *dev;
+    struct aac_srb_reply *srbreply;
+    Scsi_Cmnd *scsicmd;
+
+    scsicmd = (Scsi_Cmnd *) context;
+    dev = (struct aac_dev *)scsicmd->host->hostdata;
+
+    if (fibptr == NULL)
+	BUG();
+
+    srbreply = (struct aac_srb_reply *) fib_data(fibptr);
+
+    scsicmd->sense_buffer[0] = '\0';  // initialize sense valid flag to false
+    // calculate resid for sg 
+    scsicmd->resid = scsicmd->request_bufflen - srbreply->data_xfer_length;
+
+    if(scsicmd->use_sg)
+	pci_unmap_sg(dev->pdev, 
+		     (struct scatterlist *)scsicmd->buffer,
+		     scsicmd->use_sg,
+		     scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+    else if(scsicmd->request_bufflen)
+	pci_unmap_single(dev->pdev, (ulong)scsicmd->SCp.ptr, 
+			 scsicmd->request_bufflen,
+			 scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+
+    /*
+     * First check the fib status
+     */
+
+    if (le32_to_cpu(srbreply->status) != ST_OK){
+	int len;
+	printk(KERN_WARNING "aac_srb_callback: srb failed, status = %d\n", 
+	       le32_to_cpu(srbreply->status));
+	len = (srbreply->sense_data_size > sizeof(scsicmd->sense_buffer))?
+	    sizeof(scsicmd->sense_buffer):srbreply->sense_data_size;
+	scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8 | 
+	    CHECK_CONDITION;
+	memcpy(scsicmd->sense_buffer, srbreply->sense_data, len);
+    }
+
+    /*
+     * Next check the srb status
+     */
+    switch(le32_to_cpu(srbreply->srb_status)){
+    case SRB_STATUS_ERROR_RECOVERY:
+    case SRB_STATUS_PENDING:
+    case SRB_STATUS_SUCCESS:
+	if(scsicmd->cmnd[0] == INQUIRY ){
+	    u8 b;
+	    /* We can't expose disk devices because we can't tell whether they
+	     * are the raw container drives or stand alone drives
+	     */
+	    b = *(u8*)scsicmd->buffer;
+	    if( (b & 0x0f) == TYPE_DISK ){
+		scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+	    }
+	} else {
+	    scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+	}
+	break;
+    case SRB_STATUS_DATA_OVERRUN:
+	switch(scsicmd->cmnd[0]){
+	case  READ_6:
+	case  WRITE_6:
+	case  READ_10:
+	case  WRITE_10:
+	case  READ_12:
+	case  WRITE_12:
+	    if(le32_to_cpu(srbreply->data_xfer_length) < scsicmd->underflow ) {
+		printk(KERN_WARNING"aacraid: SCSI CMD underflow\n");
+	    } else {
+		printk(KERN_WARNING"aacraid: SCSI CMD Data Overrun\n");
+	    }
+	    scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+	    break;
+	default:
+	    scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+	    break;
+	}
+	break;
+    case SRB_STATUS_ABORTED:
+	scsicmd->result = DID_ABORT << 16 | ABORT << 8;
+	break;
+    case SRB_STATUS_ABORT_FAILED:
+	// Not sure about this one - but assuming the hba was trying 
+	// to abort for some reason
+	scsicmd->result = DID_ERROR << 16 | ABORT << 8;
+	break;
+    case SRB_STATUS_PARITY_ERROR:
+	scsicmd->result = DID_PARITY << 16 | MSG_PARITY_ERROR << 8;
+	break;
+    case SRB_STATUS_NO_DEVICE:
+    case SRB_STATUS_INVALID_PATH_ID:
+    case SRB_STATUS_INVALID_TARGET_ID:
+    case SRB_STATUS_INVALID_LUN:
+    case SRB_STATUS_SELECTION_TIMEOUT:
+	scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+	break;
+
+    case SRB_STATUS_COMMAND_TIMEOUT:
+    case SRB_STATUS_TIMEOUT:
+	scsicmd->result = DID_TIME_OUT << 16 | COMMAND_COMPLETE << 8;
+	break;
+
+    case SRB_STATUS_BUSY:
+	scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+	break;
+
+    case SRB_STATUS_BUS_RESET:
+	scsicmd->result = DID_RESET << 16 | COMMAND_COMPLETE << 8;
+	break;
+
+    case SRB_STATUS_MESSAGE_REJECTED:
+	scsicmd->result = DID_ERROR << 16 | MESSAGE_REJECT << 8;
+	break;
+    case SRB_STATUS_REQUEST_FLUSHED:
+    case SRB_STATUS_ERROR:
+    case SRB_STATUS_INVALID_REQUEST:
+    case SRB_STATUS_REQUEST_SENSE_FAILED:
+    case SRB_STATUS_NO_HBA:
+    case SRB_STATUS_UNEXPECTED_BUS_FREE:
+    case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
+    case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
+    case SRB_STATUS_DELAYED_RETRY:
+    case SRB_STATUS_BAD_FUNCTION:
+    case SRB_STATUS_NOT_STARTED:
+    case SRB_STATUS_NOT_IN_USE:
+    case SRB_STATUS_FORCE_ABORT:
+    case SRB_STATUS_DOMAIN_VALIDATION_FAIL:
+    default:
+#ifdef AAC_DETAILED_STATUS_INFO
+	printk("aacraid: SRB ERROR (%s)\n", 
+	       aac_get_status_string(le32_to_cpu(srbreply->srb_status)));
+#endif
+	scsicmd->result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+	break;
+    }
+    if (le32_to_cpu(srbreply->scsi_status) == 0x02 ){  // Check Condition
+	int len;
+	len = (srbreply->sense_data_size > sizeof(scsicmd->sense_buffer))?
+	    sizeof(scsicmd->sense_buffer):srbreply->sense_data_size;
+	printk(KERN_WARNING "aac_srb_callback: check condition, "
+	       "status = %d len=%d\n", le32_to_cpu(srbreply->status), len);
+	memcpy(scsicmd->sense_buffer, srbreply->sense_data, len);
+    }
+    /*
+     * OR in the scsi status (already shifted up a bit)
+     */
+    scsicmd->result |= le32_to_cpu(srbreply->scsi_status);
+
+    fib_complete(fibptr);
+    fib_free(fibptr);
+    aac_io_done(scsicmd);
+}
+
+/**
+ *
+ * aac_send_scb_fib
+ * @scsicmd: the scsi command block
+ *
+ * This routine will form a FIB and fill in the aac_srb from the 
+ * scsicmd passed in.
+ */
+
+static int aac_send_srb_fib(Scsi_Cmnd* scsicmd)
+{
+    struct fib* cmd_fibcontext;
+    struct aac_dev* dev;
+    int status;
+    struct aac_srb *srbcmd;
+    u16 fibsize;
+    u32 flag;
+
+    if( scsicmd->target > 15 || scsicmd->lun > 7) {
+	scsicmd->result = DID_NO_CONNECT << 16;
+	__aac_io_done(scsicmd);
+	return 0;
+    }
+
+    dev = (struct aac_dev *)scsicmd->host->hostdata;
+    switch(scsicmd->sc_data_direction){
+    case SCSI_DATA_WRITE:
+	flag = SRB_DataOut;
+	break;
+    case SCSI_DATA_UNKNOWN:  
+	flag = SRB_DataIn | SRB_DataOut;
+	break;
+    case SCSI_DATA_READ:
+	flag = SRB_DataIn;
+	break;
+    case SCSI_DATA_NONE: 
+    default:
+	flag = SRB_NoDataXfer;
+	break;
+    }
+
+
+    /*
+     *	Allocate and initialize a Fib then setup a BlockWrite command
+     */
+    if (!(cmd_fibcontext = fib_alloc(dev))) {
+	scsicmd->result = DID_ERROR << 16;
+	__aac_io_done(scsicmd);
+	return -1;
+    }
+    fib_init(cmd_fibcontext);
+
+    srbcmd = (struct aac_srb*) fib_data(cmd_fibcontext);
+    srbcmd->function = cpu_to_le32(SRBF_ExecuteScsi);
+    srbcmd->channel  = cpu_to_le32(aac_logical_to_phys(scsicmd->channel));
+    srbcmd->target   = cpu_to_le32(scsicmd->target);
+    srbcmd->lun      = cpu_to_le32(scsicmd->lun);
+    srbcmd->flags    = cpu_to_le32(flag);
+    srbcmd->timeout  = cpu_to_le32(0);  // timeout not used
+    srbcmd->retry_limit =cpu_to_le32(0); // Obsolete parameter
+    srbcmd->cdb_size = cpu_to_le32(scsicmd->cmd_len);
+	
+    if( dev->pae_support ==1 ) {
+	aac_build_sg64(scsicmd, (struct sgmap64*) &srbcmd->sg);
+	srbcmd->count = cpu_to_le32(scsicmd->request_bufflen);
+
+	memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+	memcpy(srbcmd->cdb, scsicmd->cmnd, scsicmd->cmd_len);
+	/*
+	 *	Build Scatter/Gather list
+	 */
+	fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1) 
+					     * sizeof (struct sgentry64));
+
+	/*
+	 *	Now send the Fib to the adapter
+	 */
+	status = fib_send(ScsiPortCommand64, cmd_fibcontext, fibsize, 
+			  FsaNormal, 0, 1, (fib_callback) aac_srb_callback, 
+			  (void *) scsicmd);
+    } else {
+	aac_build_sg(scsicmd, (struct sgmap*)&srbcmd->sg);
+	srbcmd->count = cpu_to_le32(scsicmd->request_bufflen);
+
+	memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+	memcpy(srbcmd->cdb, scsicmd->cmnd, scsicmd->cmd_len);
+	/*
+	 *	Build Scatter/Gather list
+	 */
+	fibsize = sizeof (struct aac_srb) + (((srbcmd->sg.count & 0xff) - 1) 
+					     * sizeof (struct sgentry));
+
+	/*
+	 *	Now send the Fib to the adapter
+	 */
+	status = fib_send(ScsiPortCommand, cmd_fibcontext, fibsize, 
+			  FsaNormal, 0, 1, (fib_callback) aac_srb_callback, 
+			  (void *) scsicmd);
+    }
+    /*
+     *	Check that the command queued to the controller
+     */
+    if (status == -EINPROGRESS){
+	return 0;
+    }
+
+    printk(KERN_WARNING "aac_srb: fib_send failed with status: %d\n", status);
+    /*
+     *	For some reason, the Fib didn't queue, return QUEUE_FULL
+     */
+    scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL;
+    __aac_io_done(scsicmd);
+
+    fib_complete(cmd_fibcontext);
+    fib_free(cmd_fibcontext);
+
+    return -1;
+}
+
+static unsigned long aac_build_sg(Scsi_Cmnd* scsicmd, struct sgmap* psg)
+{
+    struct aac_dev *dev;
+    unsigned long byte_count = 0;
+
+    dev = (struct aac_dev *)scsicmd->host->hostdata;
+    // Get rid of old data
+    psg->count = cpu_to_le32(0);
+    psg->sg[0].addr = cpu_to_le32(NULL);
+    psg->sg[0].count = cpu_to_le32(0);  
+    if (scsicmd->use_sg) {
+	struct scatterlist *sg;
+	int i;
+	int sg_count;
+	sg = (struct scatterlist *) scsicmd->request_buffer;
+
+	sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+			      scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	psg->count = cpu_to_le32(sg_count);
+
+	byte_count = 0;
+
+	for (i = 0; i < sg_count; i++) {
+	    psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg));
+	    psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+	    byte_count += sg_dma_len(sg);
+	    sg++;
+	}
+	/* hba wants the size to be exact */
+	if(byte_count > scsicmd->request_bufflen){
+	    psg->sg[i-1].count -= (byte_count - scsicmd->request_bufflen);
+	    byte_count = scsicmd->request_bufflen;
+	}
+	/* Check for command underflow */
+	if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+	    printk(KERN_WARNING"aacraid: cmd len %08lX cmd underflow %08X\n",
+		   byte_count, scsicmd->underflow);
+	}
+    }
+    else if(scsicmd->request_bufflen) {
+	dma_addr_t addr; 
+	addr = pci_map_single(dev->pdev,
+			      scsicmd->request_buffer,
+			      scsicmd->request_bufflen,
+			      scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	psg->count = cpu_to_le32(1);
+	psg->sg[0].addr = cpu_to_le32(addr);
+	psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);  
+	scsicmd->SCp.ptr = (void *)addr;
+	byte_count = scsicmd->request_bufflen;
+    }
+    return byte_count;
+}
+
+
+static unsigned long aac_build_sg64(Scsi_Cmnd* scsicmd, struct sgmap64* psg)
+{
+    struct aac_dev *dev;
+    unsigned long byte_count = 0;
+    u64 le_addr;
+
+    dev = (struct aac_dev *)scsicmd->host->hostdata;
+    // Get rid of old data
+    psg->count = cpu_to_le32(0);
+    psg->sg[0].addr[0] = cpu_to_le32(NULL);
+    psg->sg[0].addr[1] = cpu_to_le32(NULL);
+    psg->sg[0].count = cpu_to_le32(0);  
+    if (scsicmd->use_sg) {
+	struct scatterlist *sg;
+	int i;
+	int sg_count;
+	sg = (struct scatterlist *) scsicmd->request_buffer;
+
+	sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+			      scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	psg->count = cpu_to_le32(sg_count);
+
+	byte_count = 0;
+
+	for (i = 0; i < sg_count; i++) {
+	    le_addr = cpu_to_le64(sg_dma_address(sg));
+	    psg->sg[i].addr[1] = (u32)(le_addr>>32);
+	    psg->sg[i].addr[0] = (u32)(le_addr & 0xffffffff);
+	    psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+	    byte_count += sg_dma_len(sg);
+	    sg++;
+	}
+	/* hba wants the size to be exact */
+	if(byte_count > scsicmd->request_bufflen){
+	    psg->sg[i-1].count -= (byte_count - scsicmd->request_bufflen);
+	    byte_count = scsicmd->request_bufflen;
+	}
+	/* Check for command underflow */
+	if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+	    printk(KERN_WARNING"aacraid: cmd len %08lX cmd underflow %08X\n",
+		   byte_count, scsicmd->underflow);
+	}
+    }
+    else if(scsicmd->request_bufflen) {
+	dma_addr_t addr; 
+	addr = pci_map_single(dev->pdev,
+			      scsicmd->request_buffer,
+			      scsicmd->request_bufflen,
+			      scsi_to_pci_dma_dir(scsicmd->sc_data_direction));
+	psg->count = cpu_to_le32(1);
+	le_addr = cpu_to_le64(addr);
+	psg->sg[0].addr[1] = (u32)(le_addr>>32);
+	psg->sg[0].addr[0] = (u32)(le_addr & 0xffffffff);
+	psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);  
+	scsicmd->SCp.ptr = (void *)addr;
+	byte_count = scsicmd->request_bufflen;
+    }
+    return byte_count;
+}
+
+#ifdef AAC_DETAILED_STATUS_INFO
+
+struct aac_srb_status_info {
+	u32	status;
+	char	*str;
+};
+
+
+static struct aac_srb_status_info srb_status_info[] = {
+	{ SRB_STATUS_PENDING,		"Pending Status"},
+	{ SRB_STATUS_SUCCESS,		"Success"},
+	{ SRB_STATUS_ABORTED,		"Aborted Command"},
+	{ SRB_STATUS_ABORT_FAILED,	"Abort Failed"},
+	{ SRB_STATUS_ERROR,		"Error Event"}, 
+	{ SRB_STATUS_BUSY,		"Device Busy"},
+	{ SRB_STATUS_INVALID_REQUEST,	"Invalid Request"},
+	{ SRB_STATUS_INVALID_PATH_ID,	"Invalid Path ID"},
+	{ SRB_STATUS_NO_DEVICE,		"No Device"},
+	{ SRB_STATUS_TIMEOUT,		"Timeout"},
+	{ SRB_STATUS_SELECTION_TIMEOUT,	"Selection Timeout"},
+	{ SRB_STATUS_COMMAND_TIMEOUT,	"Command Timeout"},
+	{ SRB_STATUS_MESSAGE_REJECTED,	"Message Rejected"},
+	{ SRB_STATUS_BUS_RESET,		"Bus Reset"},
+	{ SRB_STATUS_PARITY_ERROR,	"Parity Error"},
+	{ SRB_STATUS_REQUEST_SENSE_FAILED,"Request Sense Failed"},
+	{ SRB_STATUS_NO_HBA,		"No HBA"},
+	{ SRB_STATUS_DATA_OVERRUN,	"Data Overrun/Data Underrun"},
+	{ SRB_STATUS_UNEXPECTED_BUS_FREE,"Unexpected Bus Free"},
+	{ SRB_STATUS_PHASE_SEQUENCE_FAILURE,"Phase Error"},
+	{ SRB_STATUS_BAD_SRB_BLOCK_LENGTH,"Bad Srb Block Length"},
+	{ SRB_STATUS_REQUEST_FLUSHED,	"Request Flushed"},
+	{ SRB_STATUS_DELAYED_RETRY,	"Delayed Retry"},
+	{ SRB_STATUS_INVALID_LUN,	"Invalid LUN"}, 
+	{ SRB_STATUS_INVALID_TARGET_ID,	"Invalid TARGET ID"},
+	{ SRB_STATUS_BAD_FUNCTION,	"Bad Function"},
+	{ SRB_STATUS_ERROR_RECOVERY,	"Error Recovery"},
+	{ SRB_STATUS_NOT_STARTED,	"Not Started"},
+	{ SRB_STATUS_NOT_IN_USE,	"Not In Use"},
+    	{ SRB_STATUS_FORCE_ABORT,	"Force Abort"},
+	{ SRB_STATUS_DOMAIN_VALIDATION_FAIL,"Domain Validation Failure"},
+	{ 0xff,				"Unknown Error"}
+};
+
+char *aac_get_status_string(u32 status)
+{
+	int i;
+
+	for(i=0; i < (sizeof(srb_status_info)/sizeof(struct aac_srb_status_info)); i++ ){
+		if(srb_status_info[i].status == status){
+			return srb_status_info[i].str;
+		}
+	}
+
+	return "Bad Status Code";
+}
+
+#endif
diff --git a/xen/drivers/scsi/aacraid/aacraid.h b/xen/drivers/scsi/aacraid/aacraid.h
new file mode 100644
index 0000000000..1f9838436d
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/aacraid.h
@@ -0,0 +1,1420 @@
+
+/* #define dprintk(x) */
+// #define dprintk(x) printk x
+#define dprintk(x)
+
+
+#include <asm/byteorder.h>
+
+#define TRY_TASKLET
+#ifdef TRY_TASKLET
+/* XXX SMH: trying to use softirqs to trigger stuff done prev by threads */
+#include <xeno/interrupt.h>  /* for tasklet/softirq stuff */
+#endif
+
+/*------------------------------------------------------------------------------
+ *              D E F I N E S
+ *----------------------------------------------------------------------------*/
+
+#define MAXIMUM_NUM_CONTAINERS	31
+#define MAXIMUM_NUM_ADAPTERS	8
+
+#define AAC_NUM_FIB	578
+#define AAC_NUM_IO_FIB	512
+
+#define AAC_MAX_TARGET (MAXIMUM_NUM_CONTAINERS+1)
+//#define AAC_MAX_TARGET 	(16)
+#define AAC_MAX_LUN	(8)
+
+/*
+ * These macros convert from physical channels to virtual channels
+ */
+#define CONTAINER_CHANNEL	(0)
+#define aac_phys_to_logical(x)  (x+1)
+#define aac_logical_to_phys(x)  (x?x-1:0)
+
+#define AAC_DETAILED_STATUS_INFO
+
+struct diskparm
+{
+	int heads;
+	int sectors;
+	int cylinders;
+};
+
+
+/*
+ *	DON'T CHANGE THE ORDER, this is set by the firmware
+ */
+ 
+#define		CT_NONE			0
+#define		CT_VOLUME		1
+#define		CT_MIRROR		2
+#define		CT_STRIPE		3
+#define		CT_RAID5		4
+#define		CT_SSRW			5
+#define		CT_SSRO			6
+#define		CT_MORPH		7
+#define		CT_PASSTHRU		8
+#define		CT_RAID4		9
+#define		CT_RAID10		10	/* stripe of mirror */
+#define		CT_RAID00		11	/* stripe of stripe */
+#define		CT_VOLUME_OF_MIRRORS	12	/* volume of mirror */
+#define		CT_PSEUDO_RAID		13	/* really raid4 */
+#define		CT_LAST_VOLUME_TYPE	14
+
+/*
+ *	Types of objects addressable in some fashion by the client.
+ *	This is a superset of those objects handled just by the filesystem
+ *	and includes "raw" objects that an administrator would use to
+ *	configure containers and filesystems.
+ */
+
+#define		FT_REG		1	/* regular file */
+#define		FT_DIR		2	/* directory */
+#define		FT_BLK		3	/* "block" device - reserved */
+#define		FT_CHR		4	/* "character special" device - reserved */
+#define		FT_LNK		5	/* symbolic link */
+#define		FT_SOCK		6	/* socket */
+#define		FT_FIFO		7	/* fifo */
+#define		FT_FILESYS	8	/* ADAPTEC's "FSA"(tm) filesystem */
+#define		FT_DRIVE	9	/* physical disk - addressable in scsi by bus/target/lun */
+#define		FT_SLICE	10	/* virtual disk - raw volume - slice */
+#define		FT_PARTITION	11	/* FSA partition - carved out of a slice - building block for containers */
+#define		FT_VOLUME	12	/* Container - Volume Set */
+#define		FT_STRIPE	13	/* Container - Stripe Set */
+#define		FT_MIRROR	14	/* Container - Mirror Set */
+#define		FT_RAID5	15	/* Container - Raid 5 Set */
+#define		FT_DATABASE	16	/* Storage object with "foreign" content manager */
+
+/*
+ *	Host side memory scatter gather list
+ *	Used by the adapter for read, write, and readdirplus operations
+ *	We have seperate 32 and 64 bit version because even
+ *	on 64 bit systems not all cards support the 64 bit version
+ */
+struct sgentry {
+	u32	addr;	/* 32-bit address. */
+	u32	count;	/* Length. */
+};
+
+struct sgentry64 {
+	u32	addr[2];	/* 64-bit addr. 2 pieces for data alignment */
+	u32	count;	/* Length. */
+};
+
+/*
+ *	SGMAP
+ *
+ *	This is the SGMAP structure for all commands that use
+ *	32-bit addressing.
+ */
+
+struct sgmap {
+	u32		count;
+	struct sgentry	sg[1]; 
+};
+
+struct sgmap64 {
+	u32		count;
+	struct sgentry64 sg[1];
+};
+
+struct creation_info
+{
+	u8 		buildnum;		/* e.g., 588 */
+	u8 		usec;			/* e.g., 588 */
+	u8	 	via;			/* e.g., 1 = FSU,
+						 * 	 2 = API
+						 */
+	u8	 	year;		 	/* e.g., 1997 = 97 */
+	u32		date;			/*
+						 * unsigned 	Month		:4;	// 1 - 12
+						 * unsigned 	Day		:6;	// 1 - 32
+						 * unsigned 	Hour		:6;	// 0 - 23
+						 * unsigned 	Minute		:6;	// 0 - 60
+						 * unsigned 	Second		:6;	// 0 - 60
+						 */
+	u32		serial[2];			/* e.g., 0x1DEADB0BFAFAF001 */
+};
+
+
+/*
+ *	Define all the constants needed for the communication interface
+ */
+
+/*
+ *	Define how many queue entries each queue will have and the total
+ *	number of entries for the entire communication interface. Also define
+ *	how many queues we support.
+ *
+ *	This has to match the controller
+ */
+
+#define NUMBER_OF_COMM_QUEUES  8   // 4 command; 4 response
+#define HOST_HIGH_CMD_ENTRIES  4
+#define HOST_NORM_CMD_ENTRIES  8
+#define ADAP_HIGH_CMD_ENTRIES  4
+#define ADAP_NORM_CMD_ENTRIES  512
+#define HOST_HIGH_RESP_ENTRIES 4
+#define HOST_NORM_RESP_ENTRIES 512
+#define ADAP_HIGH_RESP_ENTRIES 4
+#define ADAP_NORM_RESP_ENTRIES 8
+
+#define TOTAL_QUEUE_ENTRIES  \
+    (HOST_NORM_CMD_ENTRIES + HOST_HIGH_CMD_ENTRIES + ADAP_NORM_CMD_ENTRIES + ADAP_HIGH_CMD_ENTRIES + \
+	    HOST_NORM_RESP_ENTRIES + HOST_HIGH_RESP_ENTRIES + ADAP_NORM_RESP_ENTRIES + ADAP_HIGH_RESP_ENTRIES)
+
+
+/*
+ *	Set the queues on a 16 byte alignment
+ */
+ 
+#define QUEUE_ALIGNMENT		16
+
+/*
+ *	The queue headers define the Communication Region queues. These
+ *	are physically contiguous and accessible by both the adapter and the
+ *	host. Even though all queue headers are in the same contiguous block
+ *	they will be represented as individual units in the data structures.
+ */
+
+struct aac_entry {
+	u32 size;          /* Size in bytes of Fib which this QE points to */
+	u32 addr; /* Receiver address of the FIB */
+};
+
+/*
+ *	The adapter assumes the ProducerIndex and ConsumerIndex are grouped
+ *	adjacently and in that order.
+ */
+ 
+struct aac_qhdr {
+	u64 header_addr;		/* Address to hand the adapter to access to this queue head */
+	u32 *producer;			/* The producer index for this queue (host address) */
+	u32 *consumer;			/* The consumer index for this queue (host address) */
+};
+
+/*
+ *	Define all the events which the adapter would like to notify
+ *	the host of.
+ */
+ 
+#define		HostNormCmdQue		1	/* Change in host normal priority command queue */
+#define		HostHighCmdQue		2	/* Change in host high priority command queue */
+#define		HostNormRespQue		3	/* Change in host normal priority response queue */
+#define		HostHighRespQue		4	/* Change in host high priority response queue */
+#define		AdapNormRespNotFull	5
+#define		AdapHighRespNotFull	6
+#define		AdapNormCmdNotFull	7
+#define		AdapHighCmdNotFull	8
+#define		SynchCommandComplete	9
+#define		AdapInternalError	0xfe    /* The adapter detected an internal error shutting down */
+
+/*
+ *	Define all the events the host wishes to notify the
+ *	adapter of. The first four values much match the Qid the
+ *	corresponding queue.
+ */
+
+#define		AdapNormCmdQue		2
+#define		AdapHighCmdQue		3
+#define		AdapNormRespQue		6
+#define		AdapHighRespQue		7
+#define		HostShutdown		8
+#define		HostPowerFail		9
+#define		FatalCommError		10
+#define		HostNormRespNotFull	11
+#define		HostHighRespNotFull	12
+#define		HostNormCmdNotFull	13
+#define		HostHighCmdNotFull	14
+#define		FastIo			15
+#define		AdapPrintfDone		16
+
+/*
+ *	Define all the queues that the adapter and host use to communicate
+ *	Number them to match the physical queue layout.
+ */
+
+enum aac_queue_types {
+        HostNormCmdQueue = 0,	/* Adapter to host normal priority command traffic */
+        HostHighCmdQueue,	/* Adapter to host high priority command traffic */
+        AdapNormCmdQueue,	/* Host to adapter normal priority command traffic */
+        AdapHighCmdQueue,	/* Host to adapter high priority command traffic */
+        HostNormRespQueue,	/* Adapter to host normal priority response traffic */
+        HostHighRespQueue,	/* Adapter to host high priority response traffic */
+        AdapNormRespQueue,	/* Host to adapter normal priority response traffic */
+        AdapHighRespQueue	/* Host to adapter high priority response traffic */
+};
+
+/*
+ *	Assign type values to the FSA communication data structures
+ */
+
+#define		FIB_MAGIC	0x0001
+
+/*
+ *	Define the priority levels the FSA communication routines support.
+ */
+
+#define		FsaNormal	1
+#define		FsaHigh		2
+
+/*
+ * Define the FIB. The FIB is the where all the requested data and
+ * command information are put to the application on the FSA adapter.
+ */
+
+struct aac_fibhdr {
+    u32 XferState;		// Current transfer state for this CCB
+    u16 Command;		// Routing information for the destination
+    u8 StructType;		// Type FIB
+    u8 Flags;			// Flags for FIB
+    u16 Size;			// Size of this FIB in bytes
+    u16 SenderSize;		// Size of the FIB in the sender (for 
+                                // response sizing)
+    u32 SenderFibAddress;	// Host defined data in the FIB
+    u32 ReceiverFibAddress;	// Logical address of this FIB for the adapter
+    u32 SenderData;		// Place holder for the sender to store data
+    union {
+	struct {
+	    u32 _ReceiverTimeStart;  // Timestamp for receipt of fib
+	    u32 _ReceiverTimeDone;   // Timestamp for completion of fib
+	} _s;
+	struct list_head _FibLinks;  // Used to link Adapter Initiated 
+	                             // Fibs on the host
+    } _u;
+};
+
+#define FibLinks			_u._FibLinks
+
+#define FIB_DATA_SIZE_IN_BYTES (512 - sizeof(struct aac_fibhdr))
+
+
+struct hw_fib {
+	struct aac_fibhdr header;
+	u8 data[FIB_DATA_SIZE_IN_BYTES];		// Command specific data
+};
+
+/*
+ *	FIB commands
+ */
+
+#define 	TestCommandResponse		1
+#define		TestAdapterCommand		2
+/*
+ *	Lowlevel and comm commands
+ */
+#define		LastTestCommand			100
+#define		ReinitHostNormCommandQueue	101
+#define		ReinitHostHighCommandQueue	102
+#define		ReinitHostHighRespQueue		103
+#define		ReinitHostNormRespQueue		104
+#define		ReinitAdapNormCommandQueue	105
+#define		ReinitAdapHighCommandQueue	107
+#define		ReinitAdapHighRespQueue		108
+#define		ReinitAdapNormRespQueue		109
+#define		InterfaceShutdown		110
+#define		DmaCommandFib			120
+#define		StartProfile			121
+#define		TermProfile			122
+#define		SpeedTest			123
+#define		TakeABreakPt			124
+#define		RequestPerfData			125
+#define		SetInterruptDefTimer		126
+#define		SetInterruptDefCount		127
+#define		GetInterruptDefStatus		128
+#define		LastCommCommand			129
+/*
+ *	Filesystem commands
+ */
+#define		NuFileSystem			300
+#define		UFS				301
+#define		HostFileSystem			302
+#define		LastFileSystemCommand		303
+/*
+ *	Container Commands
+ */
+#define		ContainerCommand		500
+#define		ContainerCommand64		501
+/*
+ *	Cluster Commands
+ */
+#define		ClusterCommand	 		550
+/*
+ *	Scsi Port commands (scsi passthrough)
+ */
+#define		ScsiPortCommand			600
+#define		ScsiPortCommand64		601
+/*
+ *	Misc house keeping and generic adapter initiated commands
+ */
+#define		AifRequest			700
+#define		CheckRevision			701
+#define		FsaHostShutdown			702
+#define		RequestAdapterInfo		703
+#define		IsAdapterPaused			704
+#define		SendHostTime			705
+#define		LastMiscCommand			706
+
+//
+// Commands that will target the failover level on the FSA adapter
+//
+
+enum fib_xfer_state {
+	HostOwned 			= (1<<0),
+	AdapterOwned 			= (1<<1),
+	FibInitialized 			= (1<<2),
+	FibEmpty 			= (1<<3),
+	AllocatedFromPool 		= (1<<4),
+	SentFromHost 			= (1<<5),
+	SentFromAdapter 		= (1<<6),
+	ResponseExpected 		= (1<<7),
+	NoResponseExpected 		= (1<<8),
+	AdapterProcessed 		= (1<<9),
+	HostProcessed 			= (1<<10),
+	HighPriority 			= (1<<11),
+	NormalPriority 			= (1<<12),
+	Async				= (1<<13),
+	AsyncIo				= (1<<13),	// rpbfix: remove with new regime
+	PageFileIo			= (1<<14),	// rpbfix: remove with new regime
+	ShutdownRequest			= (1<<15),
+	LazyWrite			= (1<<16),	// rpbfix: remove with new regime
+	AdapterMicroFib			= (1<<17),
+	BIOSFibPath			= (1<<18),
+	FastResponseCapable		= (1<<19),
+	ApiFib				= (1<<20)	// Its an API Fib.
+};
+
+/*
+ *	The following defines needs to be updated any time there is an
+ *	incompatible change made to the aac_init structure.
+ */
+
+#define ADAPTER_INIT_STRUCT_REVISION		3
+
+struct aac_init
+{
+	u32	InitStructRevision;
+	u32	MiniPortRevision;
+	u32	fsrev;
+	u32	CommHeaderAddress;
+	u32	FastIoCommAreaAddress;
+	u32	AdapterFibsPhysicalAddress;
+	u32	AdapterFibsVirtualAddress;
+	u32	AdapterFibsSize;
+	u32	AdapterFibAlign;
+	u32	printfbuf;
+	u32	printfbufsiz;
+	u32	HostPhysMemPages;		// number of 4k pages of host physical memory
+	u32	HostElapsedSeconds;		// number of seconds since 1970.
+};
+
+enum aac_log_level {
+	LOG_INIT			= 10,
+	LOG_INFORMATIONAL		= 20,
+	LOG_WARNING			= 30,
+	LOG_LOW_ERROR			= 40,
+	LOG_MEDIUM_ERROR		= 50,
+	LOG_HIGH_ERROR			= 60,
+	LOG_PANIC			= 70,
+	LOG_DEBUG			= 80,
+	LOG_WINDBG_PRINT		= 90
+};
+
+#define FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT	0x030b
+#define FSAFS_NTC_FIB_CONTEXT			0x030c
+
+struct aac_dev;
+
+struct adapter_ops
+{
+	void (*adapter_interrupt)(struct aac_dev *dev);
+	void (*adapter_notify)(struct aac_dev *dev, u32 event);
+	void (*adapter_enable_int)(struct aac_dev *dev, u32 event);
+	void (*adapter_disable_int)(struct aac_dev *dev, u32 event);
+	int  (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 *status);
+};
+
+/*
+ *	Define which interrupt handler needs to be installed
+ */
+
+struct aac_driver_ident
+{
+	u16	vendor;
+	u16	device;
+	u16	subsystem_vendor;
+	u16	subsystem_device;
+	int 	(*init)(struct aac_dev *dev, unsigned long num);
+	char *	name;
+	char *	vname;
+	char *	model;
+	u16	channels;
+};
+
+/*
+ *	The adapter interface specs all queues to be located in the same
+ *	physically contigous block. The host structure that defines the
+ *	commuication queues will assume they are each a seperate physically
+ *	contigous memory region that will support them all being one big
+ *	contigous block. 
+ *	There is a command and response queue for each level and direction of
+ *	commuication. These regions are accessed by both the host and adapter.
+ */
+ 
+struct aac_queue {
+	u64		 	logical;		/* This is the address we give the adapter */
+	struct aac_entry	*base;		   	/* This is the system virtual address */
+	struct aac_qhdr 	headers;       		/* A pointer to the producer and consumer queue headers for this queue */
+	u32	 		entries;	   	/* Number of queue entries on this queue */
+#if 0
+	wait_queue_head_t	qfull;		      	/* Event to wait on if the queue is full */
+	wait_queue_head_t	cmdready;	  	/* Indicates there is a Command ready from the adapter on this queue. */
+#endif
+                                        		/* This is only valid for adapter to host command queues. */                      
+	spinlock_t	 	*lock;		     	/* Spinlock for this queue must take this lock before accessing the lock */
+	spinlock_t		lockdata;		/* Actual lock (used only on one side of the lock) */
+	unsigned long		SavedIrql;      	/* Previous IRQL when the spin lock is taken */
+	u32			padding;		/* Padding - FIXME - can remove I believe */
+	struct list_head 	cmdq;		   	/* A queue of FIBs which need to be prcessed by the FS thread. This is */
+                                		        /* only valid for command queues which receive entries from the adapter. */
+	struct list_head	pendingq;		/* A queue of outstanding fib's to the adapter. */
+	unsigned long		numpending;		/* Number of entries on outstanding queue. */
+	struct aac_dev *	dev;			/* Back pointer to adapter structure */
+};
+
+/*
+ *	Message queues. The order here is important, see also the 
+ *	queue type ordering
+ */
+
+struct aac_queue_block
+{
+	struct aac_queue queue[8];
+};
+
+/*
+ *	SaP1 Message Unit Registers
+ */
+ 
+struct sa_drawbridge_CSR {
+						//	 Offset |	Name
+	u32	reserved[10];			//	00h-27h |   Reserved
+	u8	LUT_Offset;			//	28h	|	Looup Table Offset
+	u8	reserved1[3];			// 	29h-2bh	|	Reserved
+	u32	LUT_Data;			//	2ch	|	Looup Table Data	
+	u32	reserved2[26];			//	30h-97h	|	Reserved
+	u16	PRICLEARIRQ;			//	98h	|	Primary Clear Irq
+	u16	SECCLEARIRQ;			//	9ah	|	Secondary Clear Irq
+	u16	PRISETIRQ;			//	9ch	|	Primary Set Irq
+	u16	SECSETIRQ;			//	9eh	|	Secondary Set Irq
+	u16	PRICLEARIRQMASK;		//	a0h	|	Primary Clear Irq Mask
+	u16	SECCLEARIRQMASK;		//	a2h	|	Secondary Clear Irq Mask
+	u16	PRISETIRQMASK;			//	a4h	|	Primary Set Irq Mask
+	u16	SECSETIRQMASK;			//	a6h	|	Secondary Set Irq Mask
+	u32	MAILBOX0;			//	a8h	|	Scratchpad 0
+	u32	MAILBOX1;			//	ach	|	Scratchpad 1
+	u32	MAILBOX2;			//	b0h	|	Scratchpad 2
+	u32	MAILBOX3;			//	b4h	|	Scratchpad 3
+	u32	MAILBOX4;			//	b8h	|	Scratchpad 4
+	u32	MAILBOX5;			//	bch	|	Scratchpad 5
+	u32	MAILBOX6;			//	c0h	|	Scratchpad 6
+	u32	MAILBOX7;			//	c4h	|	Scratchpad 7
+
+	u32	ROM_Setup_Data;			//	c8h | 	Rom Setup and Data
+	u32	ROM_Control_Addr;		//	cch | 	Rom Control and Address
+
+	u32	reserved3[12];			//	d0h-ffh	| 	reserved
+	u32	LUT[64];			// 100h-1ffh|	Lookup Table Entries
+
+	//
+	//  TO DO
+	//	need to add DMA, I2O, UART, etc registers form 80h to 364h
+	//
+
+};
+
+#define Mailbox0	SaDbCSR.MAILBOX0
+#define Mailbox1	SaDbCSR.MAILBOX1
+#define Mailbox2	SaDbCSR.MAILBOX2
+#define Mailbox3	SaDbCSR.MAILBOX3
+#define Mailbox4	SaDbCSR.MAILBOX4
+#define Mailbox5	SaDbCSR.MAILBOX5
+#define Mailbox7	SaDbCSR.MAILBOX7
+	
+#define DoorbellReg_p SaDbCSR.PRISETIRQ
+#define DoorbellReg_s SaDbCSR.SECSETIRQ
+#define DoorbellClrReg_p SaDbCSR.PRICLEARIRQ
+
+
+#define	DOORBELL_0	cpu_to_le16(0x0001)
+#define DOORBELL_1	cpu_to_le16(0x0002)
+#define DOORBELL_2	cpu_to_le16(0x0004)
+#define DOORBELL_3	cpu_to_le16(0x0008)
+#define DOORBELL_4	cpu_to_le16(0x0010)
+#define DOORBELL_5	cpu_to_le16(0x0020)
+#define DOORBELL_6	cpu_to_le16(0x0040)
+
+	
+#define PrintfReady	DOORBELL_5
+#define PrintfDone	DOORBELL_5
+	
+struct sa_registers {
+	struct sa_drawbridge_CSR	SaDbCSR;			/* 98h - c4h */
+};
+	
+
+#define Sa_MINIPORT_REVISION			1
+
+#define sa_readw(AEP, CSR)		readl(&((AEP)->regs.sa->CSR))
+#define sa_readl(AEP,  CSR)		readl(&((AEP)->regs.sa->CSR))
+#define sa_writew(AEP, CSR, value)	writew(value, &((AEP)->regs.sa->CSR))
+#define sa_writel(AEP, CSR, value)	writel(value, &((AEP)->regs.sa->CSR))
+
+/*
+ *	Rx Message Unit Registers
+ */
+
+struct rx_mu_registers {
+						//	 Local	|   PCI*	|	Name
+						//			|		|
+	u32	ARSR;				//	1300h	|	00h	|	APIC Register Select Register
+	u32	reserved0;			//	1304h	|	04h	|	Reserved
+	u32	AWR;				//	1308h	|	08h	|	APIC Window Register
+	u32	reserved1;			//	130Ch	|	0Ch	|	Reserved
+	u32	IMRx[2];			//	1310h	|	10h	|	Inbound Message Registers
+	u32	OMRx[2];			//	1318h	|	18h	|	Outbound Message Registers
+	u32	IDR;				//	1320h	|	20h	|	Inbound Doorbell Register
+	u32	IISR;				//	1324h	|	24h	|	Inbound Interrupt Status Register
+	u32	IIMR;				//	1328h	|	28h	|	Inbound Interrupt Mask Register
+	u32	ODR;				//	132Ch	|	2Ch	|	Outbound Doorbell Register
+	u32	OISR;				//	1330h	|	30h	|	Outbound Interrupt Status Register
+	u32	OIMR;				//	1334h	|	34h	|	Outbound Interrupt Mask Register
+						// * Must access through ATU Inbound Translation Window
+};
+
+struct rx_inbound {
+	u32	Mailbox[8];
+};
+
+#define	InboundMailbox0		IndexRegs.Mailbox[0]
+#define	InboundMailbox1		IndexRegs.Mailbox[1]
+#define	InboundMailbox2		IndexRegs.Mailbox[2]
+#define	InboundMailbox3		IndexRegs.Mailbox[3]
+#define	InboundMailbox4		IndexRegs.Mailbox[4]
+
+#define	INBOUNDDOORBELL_0	cpu_to_le32(0x00000001)
+#define INBOUNDDOORBELL_1	cpu_to_le32(0x00000002)
+#define INBOUNDDOORBELL_2	cpu_to_le32(0x00000004)
+#define INBOUNDDOORBELL_3	cpu_to_le32(0x00000008)
+#define INBOUNDDOORBELL_4	cpu_to_le32(0x00000010)
+#define INBOUNDDOORBELL_5	cpu_to_le32(0x00000020)
+#define INBOUNDDOORBELL_6	cpu_to_le32(0x00000040)
+
+#define	OUTBOUNDDOORBELL_0	cpu_to_le32(0x00000001)
+#define OUTBOUNDDOORBELL_1	cpu_to_le32(0x00000002)
+#define OUTBOUNDDOORBELL_2	cpu_to_le32(0x00000004)
+#define OUTBOUNDDOORBELL_3	cpu_to_le32(0x00000008)
+#define OUTBOUNDDOORBELL_4	cpu_to_le32(0x00000010)
+
+#define InboundDoorbellReg	MUnit.IDR
+#define OutboundDoorbellReg	MUnit.ODR
+
+struct rx_registers {
+	struct rx_mu_registers		MUnit;		// 1300h - 1334h
+	u32				reserved1[6];	// 1338h - 134ch
+	struct rx_inbound		IndexRegs;
+};
+
+#define rx_readb(AEP, CSR)		readb(&((AEP)->regs.rx->CSR))
+#define rx_readl(AEP, CSR)		readl(&((AEP)->regs.rx->CSR))
+#define rx_writeb(AEP, CSR, value)	writeb(value, &((AEP)->regs.rx->CSR))
+#define rx_writel(AEP, CSR, value)	writel(value, &((AEP)->regs.rx->CSR))
+
+struct fib;
+
+typedef void (*fib_callback)(void *ctxt, struct fib *fibctx);
+
+struct aac_fib_context {
+	s16	 		type;		// used for verification of structure	
+	s16	 		size;
+	ulong			jiffies;	// used for cleanup - dmb changed to ulong
+	struct list_head	next;		// used to link context's into a linked list
+#if 0
+	struct semaphore 	wait_sem;	// this is used to wait for the next fib to arrive.
+#endif
+	int			wait;		// Set to true when thread is in WaitForSingleObject
+	unsigned long		count;		// total number of FIBs on FibList
+	struct list_head	fibs;
+};
+
+struct fsa_scsi_hba {
+	u32		size[MAXIMUM_NUM_CONTAINERS];
+	u32		type[MAXIMUM_NUM_CONTAINERS];
+	u8		valid[MAXIMUM_NUM_CONTAINERS];
+	u8		ro[MAXIMUM_NUM_CONTAINERS];
+	u8		locked[MAXIMUM_NUM_CONTAINERS];
+	u8		deleted[MAXIMUM_NUM_CONTAINERS];
+	u32		devno[MAXIMUM_NUM_CONTAINERS];
+};
+
+struct fib {
+	void			*next;	/* this is used by the allocator */
+	s16			type;
+	s16			size;
+	/*
+	 *	The Adapter that this I/O is destined for.
+	 */
+	struct aac_dev 		*dev;
+	u64			logicaladdr;	/* 64 bit */
+#if 0
+	/*
+	 *	This is the event the sendfib routine will wait on if the
+	 *	caller did not pass one and this is synch io.
+	 */
+	struct semaphore 	event_wait;
+#endif
+	spinlock_t		event_lock;
+
+	u32			done;	/* gets set to 1 when fib is complete */
+	fib_callback 		callback;
+	void 			*callback_data;
+	u32			flags; // u32 dmb was ulong
+	/*
+	 *	The following is used to put this fib context onto the 
+	 *	Outstanding I/O queue.
+	 */
+	struct list_head	queue;
+
+	void 			*data;
+	struct hw_fib		*fib;		/* Actual shared object */
+};
+
+/*
+ *	Adapter Information Block
+ *
+ *	This is returned by the RequestAdapterInfo block
+ */
+ 
+struct aac_adapter_info
+{
+	u32	platform;
+	u32	cpu;
+	u32	subcpu;
+	u32	clock;
+	u32	execmem;
+	u32	buffermem;
+	u32	totalmem;
+	u32	kernelrev;
+	u32	kernelbuild;
+	u32	monitorrev;
+	u32	monitorbuild;
+	u32	hwrev;
+	u32	hwbuild;
+	u32	biosrev;
+	u32	biosbuild;
+	u32	cluster;
+	u32	serial[2];
+	u32	battery;
+	u32	options;
+	u32	OEM;
+};
+
+/*
+ * Battery platforms
+ */
+#define AAC_BAT_REQ_PRESENT	(1)
+#define AAC_BAT_REQ_NOTPRESENT	(2)
+#define AAC_BAT_OPT_PRESENT	(3)
+#define AAC_BAT_OPT_NOTPRESENT	(4)
+#define AAC_BAT_NOT_SUPPORTED	(5)
+/*
+ * cpu types
+ */
+#define AAC_CPU_SIMULATOR	(1)
+#define AAC_CPU_I960		(2)
+#define AAC_CPU_STRONGARM	(3)
+
+/*
+ * Supported Options
+ */
+#define AAC_OPT_SNAPSHOT	cpu_to_le32(1)
+#define AAC_OPT_CLUSTERS	cpu_to_le32(1<<1)
+#define AAC_OPT_WRITE_CACHE	cpu_to_le32(1<<2)
+#define AAC_OPT_64BIT_DATA	cpu_to_le32(1<<3)
+#define AAC_OPT_HOST_TIME_FIB	cpu_to_le32(1<<4)
+#define AAC_OPT_RAID50		cpu_to_le32(1<<5)
+#define AAC_OPT_4GB_WINDOW	cpu_to_le32(1<<6)
+#define AAC_OPT_SCSI_UPGRADEABLE cpu_to_le32(1<<7)
+#define AAC_OPT_SOFT_ERR_REPORT	cpu_to_le32(1<<8)
+#define AAC_OPT_SUPPORTED_RECONDITION cpu_to_le32(1<<9)
+#define AAC_OPT_SGMAP_HOST64	cpu_to_le32(1<<10)
+#define AAC_OPT_ALARM		cpu_to_le32(1<<11)
+#define AAC_OPT_NONDASD		cpu_to_le32(1<<12)
+
+struct aac_dev
+{
+	struct aac_dev		*next;
+	const char		*name;
+	int			id;
+
+	u16			irq_mask;
+	/*
+	 *	Map for 128 fib objects (64k)
+	 */	
+	dma_addr_t		hw_fib_pa;
+	struct hw_fib		*hw_fib_va;
+#if BITS_PER_LONG >= 64
+	ulong			fib_base_va;
+#endif
+	/*
+	 *	Fib Headers
+	 */
+	struct fib		fibs[AAC_NUM_FIB];
+	struct fib		*free_fib;
+	struct fib		*timeout_fib;
+	spinlock_t		fib_lock;
+	
+	struct aac_queue_block *queues;
+	/*
+	 *	The user API will use an IOCTL to register itself to receive
+	 *	FIBs from the adapter.  The following list is used to keep
+	 *	track of all the threads that have requested these FIBs.  The
+	 *	mutex is used to synchronize access to all data associated 
+	 *	with the adapter fibs.
+	 */
+	struct list_head	fib_list;
+
+	struct adapter_ops	a_ops;
+	unsigned long		fsrev;		/* Main driver's revision number */
+	
+	struct aac_init		*init;		/* Holds initialization info to communicate with adapter */
+	dma_addr_t		init_pa; 	/* Holds physical address of the init struct */
+	
+	struct pci_dev		*pdev;		/* Our PCI interface */
+	void *			printfbuf;	/* pointer to buffer used for printf's from the adapter */
+	void *			comm_addr;	/* Base address of Comm area */
+	dma_addr_t		comm_phys;	/* Physical Address of Comm area */
+	size_t			comm_size;
+
+	struct Scsi_Host	*scsi_host_ptr;
+	struct fsa_scsi_hba	fsa_dev;
+	int			thread_pid;
+	int			cardtype;
+	
+	/*
+	 *	The following is the device specific extension.
+	 */
+	union
+	{
+		struct sa_registers *sa;
+		struct rx_registers *rx;
+	} regs;
+	/*
+	 *	The following is the number of the individual adapter
+	 */
+	u32			devnum;
+	u32			aif_thread;
+#if 0
+	struct completion	aif_completion;
+#endif
+	struct aac_adapter_info adapter_info;
+	/* These are in adapter info but they are in the io flow so
+	 * lets break them out so we don't have to do an AND to check them
+	 */
+	u8			nondasd_support; 
+	u8			pae_support;
+};
+
+#define aac_adapter_interrupt(dev) \
+	dev->a_ops.adapter_interrupt(dev)
+
+#define aac_adapter_notify(dev, event) \
+	dev->a_ops.adapter_notify(dev, event)
+
+#define aac_adapter_enable_int(dev, event) \
+	dev->a_ops.adapter_enable_int(dev, event)
+
+#define aac_adapter_disable_int(dev, event) \
+	dev->a_ops.adapter_disable_int(dev, event)
+
+
+
+#define FIB_CONTEXT_FLAG_TIMED_OUT		(0x00000001)
+
+/*
+ *	Define the command values
+ */
+ 
+#define		Null			0
+#define 	GetAttributes		1
+#define 	SetAttributes		2
+#define 	Lookup			3
+#define 	ReadLink		4
+#define 	Read			5
+#define 	Write			6
+#define		Create			7
+#define		MakeDirectory		8
+#define		SymbolicLink		9
+#define		MakeNode		10
+#define		Removex			11
+#define		RemoveDirectoryx	12
+#define		Rename			13
+#define		Link			14
+#define		ReadDirectory		15
+#define		ReadDirectoryPlus	16
+#define		FileSystemStatus	17
+#define		FileSystemInfo		18
+#define		PathConfigure		19
+#define		Commit			20
+#define		Mount			21
+#define		UnMount			22
+#define		Newfs			23
+#define		FsCheck			24
+#define		FsSync			25
+#define		SimReadWrite		26
+#define		SetFileSystemStatus	27
+#define		BlockRead		28
+#define		BlockWrite		29
+#define		NvramIoctl		30
+#define		FsSyncWait		31
+#define		ClearArchiveBit		32
+#define		SetAcl			33
+#define		GetAcl			34
+#define		AssignAcl		35
+#define		FaultInsertion		36	/* Fault Insertion Command */
+#define		CrazyCache		37	/* Crazycache */
+
+#define		MAX_FSACOMMAND_NUM	38
+
+
+/*
+ *	Define the status returns. These are very unixlike although
+ *	most are not in fact used
+ */
+
+#define		ST_OK		0
+#define		ST_PERM		1
+#define		ST_NOENT	2
+#define		ST_IO		5
+#define		ST_NXIO		6
+#define		ST_E2BIG	7
+#define		ST_ACCES	13
+#define		ST_EXIST	17
+#define		ST_XDEV		18
+#define		ST_NODEV	19
+#define		ST_NOTDIR	20
+#define		ST_ISDIR	21
+#define		ST_INVAL	22
+#define		ST_FBIG		27
+#define		ST_NOSPC	28
+#define		ST_ROFS		30
+#define		ST_MLINK	31
+#define		ST_WOULDBLOCK	35
+#define		ST_NAMETOOLONG	63
+#define		ST_NOTEMPTY	66
+#define		ST_DQUOT	69
+#define		ST_STALE	70
+#define		ST_REMOTE	71
+#define		ST_BADHANDLE	10001
+#define		ST_NOT_SYNC	10002
+#define		ST_BAD_COOKIE	10003
+#define		ST_NOTSUPP	10004
+#define		ST_TOOSMALL	10005
+#define		ST_SERVERFAULT	10006
+#define		ST_BADTYPE	10007
+#define		ST_JUKEBOX	10008
+#define		ST_NOTMOUNTED	10009
+#define		ST_MAINTMODE	10010
+#define		ST_STALEACL	10011
+
+/*
+ *	On writes how does the client want the data written.
+ */
+
+#define	CACHE_CSTABLE		1
+#define CACHE_UNSTABLE		2
+
+/*
+ *	Lets the client know at which level the data was commited on
+ *	a write request
+ */
+
+#define	CMFILE_SYNCH_NVRAM	1
+#define	CMDATA_SYNCH_NVRAM	2
+#define	CMFILE_SYNCH		3
+#define CMDATA_SYNCH		4
+#define CMUNSTABLE		5
+
+struct aac_read
+{
+	u32	 	command;
+	u32 		cid;
+	u32 		block;
+	u32 		count;
+	struct sgmap	sg;	// Must be last in struct because it is variable
+};
+
+struct aac_read64
+{
+	u32	 	command;
+	u16 		cid;
+	u16 		sector_count;
+	u32 		block;
+	u16		pad;
+	u16		flags;
+	struct sgmap64	sg;	// Must be last in struct because it is variable
+};
+
+struct aac_read_reply
+{
+	u32	 	status;
+	u32 		count;
+};
+
+struct aac_write
+{
+	u32		command;
+	u32 		cid;
+	u32 		block;
+	u32 		count;
+	u32	 	stable;	// Not used
+	struct sgmap	sg;	// Must be last in struct because it is variable
+};
+
+struct aac_write64
+{
+	u32	 	command;
+	u16 		cid;
+	u16 		sector_count;
+	u32 		block;
+	u16		pad;
+	u16		flags;
+	struct sgmap64	sg;	// Must be last in struct because it is variable
+};
+struct aac_write_reply
+{
+	u32		status;
+	u32 		count;
+	u32		committed;
+};
+
+struct aac_srb
+{
+	u32		function;
+	u32		channel;
+	u32		target;
+	u32		lun;
+	u32		timeout;
+	u32		flags;
+	u32		count;		// Data xfer size
+	u32		retry_limit;
+	u32		cdb_size;
+	u8		cdb[16];
+	struct	sgmap	sg;
+};
+
+
+
+#define		AAC_SENSE_BUFFERSIZE	 30
+
+struct aac_srb_reply
+{
+	u32		status;
+	u32		srb_status;
+	u32		scsi_status;
+	u32		data_xfer_length;
+	u32		sense_data_size;
+	u8		sense_data[AAC_SENSE_BUFFERSIZE]; // Can this be SCSI_SENSE_BUFFERSIZE
+};
+/*
+ * SRB Flags
+ */
+#define		SRB_NoDataXfer		 0x0000
+#define		SRB_DisableDisconnect	 0x0004
+#define		SRB_DisableSynchTransfer 0x0008
+#define 	SRB_BypassFrozenQueue	 0x0010
+#define		SRB_DisableAutosense	 0x0020
+#define		SRB_DataIn		 0x0040
+#define 	SRB_DataOut		 0x0080
+
+/*
+ * SRB Functions - set in aac_srb->function
+ */
+#define	SRBF_ExecuteScsi	0x0000
+#define	SRBF_ClaimDevice	0x0001
+#define	SRBF_IO_Control		0x0002
+#define	SRBF_ReceiveEvent	0x0003
+#define	SRBF_ReleaseQueue	0x0004
+#define	SRBF_AttachDevice	0x0005
+#define	SRBF_ReleaseDevice	0x0006
+#define	SRBF_Shutdown		0x0007
+#define	SRBF_Flush		0x0008
+#define	SRBF_AbortCommand	0x0010
+#define	SRBF_ReleaseRecovery	0x0011
+#define	SRBF_ResetBus		0x0012
+#define	SRBF_ResetDevice	0x0013
+#define	SRBF_TerminateIO	0x0014
+#define	SRBF_FlushQueue		0x0015
+#define	SRBF_RemoveDevice	0x0016
+#define	SRBF_DomainValidation	0x0017
+
+/* 
+ * SRB SCSI Status - set in aac_srb->scsi_status
+ */
+#define SRB_STATUS_PENDING                  0x00
+#define SRB_STATUS_SUCCESS                  0x01
+#define SRB_STATUS_ABORTED                  0x02
+#define SRB_STATUS_ABORT_FAILED             0x03
+#define SRB_STATUS_ERROR                    0x04
+#define SRB_STATUS_BUSY                     0x05
+#define SRB_STATUS_INVALID_REQUEST          0x06
+#define SRB_STATUS_INVALID_PATH_ID          0x07
+#define SRB_STATUS_NO_DEVICE                0x08
+#define SRB_STATUS_TIMEOUT                  0x09
+#define SRB_STATUS_SELECTION_TIMEOUT        0x0A
+#define SRB_STATUS_COMMAND_TIMEOUT          0x0B
+#define SRB_STATUS_MESSAGE_REJECTED         0x0D
+#define SRB_STATUS_BUS_RESET                0x0E
+#define SRB_STATUS_PARITY_ERROR             0x0F
+#define SRB_STATUS_REQUEST_SENSE_FAILED     0x10
+#define SRB_STATUS_NO_HBA                   0x11
+#define SRB_STATUS_DATA_OVERRUN             0x12
+#define SRB_STATUS_UNEXPECTED_BUS_FREE      0x13
+#define SRB_STATUS_PHASE_SEQUENCE_FAILURE   0x14
+#define SRB_STATUS_BAD_SRB_BLOCK_LENGTH     0x15
+#define SRB_STATUS_REQUEST_FLUSHED          0x16
+#define SRB_STATUS_DELAYED_RETRY	    0x17
+#define SRB_STATUS_INVALID_LUN              0x20
+#define SRB_STATUS_INVALID_TARGET_ID        0x21
+#define SRB_STATUS_BAD_FUNCTION             0x22
+#define SRB_STATUS_ERROR_RECOVERY           0x23
+#define SRB_STATUS_NOT_STARTED		    0x24
+#define SRB_STATUS_NOT_IN_USE		    0x30
+#define SRB_STATUS_FORCE_ABORT		    0x31
+#define SRB_STATUS_DOMAIN_VALIDATION_FAIL   0x32
+
+/*
+ * Object-Server / Volume-Manager Dispatch Classes
+ */
+
+#define		VM_Null			0
+#define		VM_NameServe		1
+#define		VM_ContainerConfig	2
+#define		VM_Ioctl		3
+#define		VM_FilesystemIoctl	4
+#define		VM_CloseAll		5
+#define		VM_CtBlockRead		6
+#define		VM_CtBlockWrite		7
+#define		VM_SliceBlockRead	8	/* raw access to configured "storage objects" */
+#define		VM_SliceBlockWrite	9
+#define		VM_DriveBlockRead	10	/* raw access to physical devices */
+#define		VM_DriveBlockWrite	11
+#define		VM_EnclosureMgt		12	/* enclosure management */
+#define		VM_Unused		13	/* used to be diskset management */
+#define		VM_CtBlockVerify	14
+#define		VM_CtPerf		15	/* performance test */
+#define		VM_CtBlockRead64	16
+#define		VM_CtBlockWrite64	17
+#define		VM_CtBlockVerify64	18
+#define		VM_CtHostRead64		19
+#define		VM_CtHostWrite64	20
+
+#define		MAX_VMCOMMAND_NUM	21	/* used for sizing stats array - leave last */
+
+/*
+ *	Descriptive information (eg, vital stats)
+ *	that a content manager might report.  The
+ *	FileArray filesystem component is one example
+ *	of a content manager.  Raw mode might be
+ *	another.
+ */
+
+struct aac_fsinfo {
+	u32  fsTotalSize;	/* Consumed by fs, incl. metadata */
+	u32  fsBlockSize;
+	u32  fsFragSize;
+	u32  fsMaxExtendSize;
+	u32  fsSpaceUnits;
+	u32  fsMaxNumFiles;
+	u32  fsNumFreeFiles;
+	u32  fsInodeDensity;
+};	/* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
+
+union aac_contentinfo {
+	struct aac_fsinfo filesys;	/* valid iff ObjType == FT_FILESYS && !(ContentState & FSCS_NOTCLEAN) */
+};
+
+/*
+ *	Query for "mountable" objects, ie, objects that are typically
+ *	associated with a drive letter on the client (host) side.
+ */
+
+struct aac_mntent {
+	u32    			oid;
+	u8			name[16];	// if applicable
+	struct creation_info	create_info;	// if applicable
+	u32			capacity;
+	u32			vol;    	// substrate structure
+	u32			obj;	        // FT_FILESYS, FT_DATABASE, etc.
+	u32			state;		// unready for mounting, readonly, etc.
+	union aac_contentinfo	fileinfo;	// Info specific to content manager (eg, filesystem)
+	u32			altoid;		// != oid <==> snapshot or broken mirror exists
+};
+
+#define FSCS_READONLY	0x0002	/*	possible result of broken mirror */
+
+struct aac_query_mount {
+	u32		command;
+	u32		type;
+	u32		count;
+};
+
+struct aac_mount {
+	u32		status;
+	u32	   	type;           /* should be same as that requested */
+	u32		count;
+	struct aac_mntent mnt[1];
+};
+
+/*
+ * The following command is sent to shut down each container.
+ */
+
+struct aac_close {
+	u32	command;
+	u32	cid;
+};
+
+struct aac_query_disk
+{
+	s32	cnum;
+	s32	bus;
+	s32	target;
+	s32	lun;
+	u32	valid;
+	u32	locked;
+	u32	deleted;
+	s32	instance;
+	s8	name[10];
+	u32	unmapped;
+};
+
+struct aac_delete_disk {
+	u32	disknum;
+	u32	cnum;
+};
+
+struct fib_ioctl
+{
+	char	*fibctx;
+	int	wait;
+	char	*fib;
+};
+
+struct revision
+{
+	u32 compat;
+	u32 version;
+	u32 build;
+};
+	
+/*
+ * 	Ugly - non Linux like ioctl coding for back compat.
+ */
+
+#define CTL_CODE(function, method) (                 \
+    (4<< 16) | ((function) << 2) | (method) \
+)
+
+/*
+ *	Define the method codes for how buffers are passed for I/O and FS 
+ *	controls
+ */
+
+#define METHOD_BUFFERED                 0
+#define METHOD_NEITHER                  3
+
+/*
+ *	Filesystem ioctls
+ */
+
+#define FSACTL_SENDFIB                  	CTL_CODE(2050, METHOD_BUFFERED)
+#define FSACTL_SEND_RAW_SRB               	CTL_CODE(2067, METHOD_BUFFERED)
+#define FSACTL_DELETE_DISK			0x163
+#define FSACTL_QUERY_DISK			0x173
+#define FSACTL_OPEN_GET_ADAPTER_FIB		CTL_CODE(2100, METHOD_BUFFERED)
+#define FSACTL_GET_NEXT_ADAPTER_FIB		CTL_CODE(2101, METHOD_BUFFERED)
+#define FSACTL_CLOSE_GET_ADAPTER_FIB		CTL_CODE(2102, METHOD_BUFFERED)
+#define FSACTL_MINIPORT_REV_CHECK               CTL_CODE(2107, METHOD_BUFFERED)
+#define FSACTL_GET_PCI_INFO               	CTL_CODE(2119, METHOD_BUFFERED)
+#define FSACTL_FORCE_DELETE_DISK		CTL_CODE(2120, METHOD_NEITHER)
+
+
+struct aac_common
+{
+	/*
+	 *	If this value is set to 1 then interrupt moderation will occur 
+	 *	in the base commuication support.
+	 */
+	u32 irq_mod;
+	u32 peak_fibs;
+	u32 zero_fibs;
+	u32 fib_timeouts;
+	/*
+	 *	Statistical counters in debug mode
+	 */
+#ifdef DBG
+	u32 FibsSent;
+	u32 FibRecved;
+	u32 NoResponseSent;
+	u32 NoResponseRecved;
+	u32 AsyncSent;
+	u32 AsyncRecved;
+	u32 NormalSent;
+	u32 NormalRecved;
+#endif
+};
+
+extern struct aac_common aac_config;
+
+
+/*
+ *	The following macro is used when sending and receiving FIBs. It is
+ *	only used for debugging.
+ */
+ 
+#if DBG
+#define	FIB_COUNTER_INCREMENT(counter)		(counter)++
+#else
+#define	FIB_COUNTER_INCREMENT(counter)		
+#endif
+
+/*
+ *	Adapter direct commands
+ *	Monitor/Kernel API
+ */
+
+#define	BREAKPOINT_REQUEST		cpu_to_le32(0x00000004)
+#define	INIT_STRUCT_BASE_ADDRESS	cpu_to_le32(0x00000005)
+#define READ_PERMANENT_PARAMETERS	cpu_to_le32(0x0000000a)
+#define WRITE_PERMANENT_PARAMETERS	cpu_to_le32(0x0000000b)
+#define HOST_CRASHING			cpu_to_le32(0x0000000d)
+#define	SEND_SYNCHRONOUS_FIB		cpu_to_le32(0x0000000c)
+#define GET_ADAPTER_PROPERTIES		cpu_to_le32(0x00000019)
+#define RE_INIT_ADAPTER			cpu_to_le32(0x000000ee)
+
+/*
+ *	Adapter Status Register
+ *
+ *  Phase Staus mailbox is 32bits:
+ *	<31:16> = Phase Status
+ *	<15:0>  = Phase
+ *
+ *	The adapter reports is present state through the phase.  Only
+ *	a single phase should be ever be set.  Each phase can have multiple
+ *	phase status bits to provide more detailed information about the 
+ *	state of the board.  Care should be taken to ensure that any phase 
+ *	status bits that are set when changing the phase are also valid
+ *	for the new phase or be cleared out.  Adapter software (monitor,
+ *	iflash, kernel) is responsible for properly maintining the phase 
+ *	status mailbox when it is running.
+ *											
+ *	MONKER_API Phases							
+ *
+ *	Phases are bit oriented.  It is NOT valid  to have multiple bits set						
+ */					
+
+#define	SELF_TEST_FAILED		cpu_to_le32(0x00000004)
+#define	KERNEL_UP_AND_RUNNING		cpu_to_le32(0x00000080)
+#define	KERNEL_PANIC			cpu_to_le32(0x00000100)
+
+/*
+ *	Doorbell bit defines
+ */
+
+#define DoorBellPrintfDone		cpu_to_le32(1<<5)	// Host -> Adapter
+#define DoorBellAdapterNormCmdReady	cpu_to_le32(1<<1)	// Adapter -> Host
+#define DoorBellAdapterNormRespReady	cpu_to_le32(1<<2)	// Adapter -> Host
+#define DoorBellAdapterNormCmdNotFull	cpu_to_le32(1<<3)	// Adapter -> Host
+#define DoorBellAdapterNormRespNotFull	cpu_to_le32(1<<4)	// Adapter -> Host
+#define DoorBellPrintfReady		cpu_to_le32(1<<5)	// Adapter -> Host
+
+/*
+ *	For FIB communication, we need all of the following things
+ *	to send back to the user.
+ */
+ 
+#define 	AifCmdEventNotify	1	/* Notify of event */
+#define		AifCmdJobProgress	2	/* Progress report */
+#define		AifCmdAPIReport		3	/* Report from other user of API */
+#define		AifCmdDriverNotify	4	/* Notify host driver of event */
+#define		AifReqJobList		100	/* Gets back complete job list */
+#define		AifReqJobsForCtr	101	/* Gets back jobs for specific container */
+#define		AifReqJobsForScsi	102	/* Gets back jobs for specific SCSI device */ 
+#define		AifReqJobReport		103	/* Gets back a specific job report or list of them */ 
+#define		AifReqTerminateJob	104	/* Terminates job */
+#define		AifReqSuspendJob	105	/* Suspends a job */
+#define		AifReqResumeJob		106	/* Resumes a job */ 
+#define		AifReqSendAPIReport	107	/* API generic report requests */
+#define		AifReqAPIJobStart	108	/* Start a job from the API */
+#define		AifReqAPIJobUpdate	109	/* Update a job report from the API */
+#define		AifReqAPIJobFinish	110	/* Finish a job from the API */
+
+/*
+ *	Adapter Initiated FIB command structures. Start with the adapter
+ *	initiated FIBs that really come from the adapter, and get responded
+ *	to by the host.
+ */
+
+struct aac_aifcmd {
+	u32 command;		/* Tell host what type of notify this is */
+	u32 seqnum;		/* To allow ordering of reports (if necessary) */
+	u8 data[1];		/* Undefined length (from kernel viewpoint) */
+};
+
+static inline u32 fib2addr(struct hw_fib *hw)
+{
+	return (u32)hw;
+}
+
+static inline struct hw_fib *addr2fib(u32 addr)
+{
+	return (struct hw_fib *)addr;
+}
+
+const char *aac_driverinfo(struct Scsi_Host *);
+struct fib *fib_alloc(struct aac_dev *dev);
+int fib_setup(struct aac_dev *dev);
+void fib_map_free(struct aac_dev *dev);
+void fib_free(struct fib * context);
+void fib_init(struct fib * context);
+void fib_dealloc(struct fib * context);
+void aac_printf(struct aac_dev *dev, u32 val);
+int fib_send(u16 command, struct fib * context, unsigned long size, int priority, int wait, int reply, fib_callback callback, void *ctxt);
+int aac_consumer_get(struct aac_dev * dev, struct aac_queue * q, struct aac_entry **entry);
+int aac_consumer_avail(struct aac_dev * dev, struct aac_queue * q);
+void aac_consumer_free(struct aac_dev * dev, struct aac_queue * q, u32 qnum);
+int fib_complete(struct fib * context);
+#define fib_data(fibctx) ((void *)(fibctx)->fib->data)
+int aac_detach(struct aac_dev *dev);
+struct aac_dev *aac_init_adapter(struct aac_dev *dev);
+int aac_get_containers(struct aac_dev *dev);
+int aac_scsi_cmd(Scsi_Cmnd *scsi_cmnd_ptr);
+int aac_dev_ioctl(struct aac_dev *dev, int cmd, void *arg);
+int aac_do_ioctl(struct aac_dev * dev, int cmd, void *arg);
+int aac_rx_init(struct aac_dev *dev, unsigned long devNumber);
+int aac_sa_init(struct aac_dev *dev, unsigned long devNumber);
+unsigned int aac_response_normal(struct aac_queue * q);
+unsigned int aac_command_normal(struct aac_queue * q);
+#ifdef TRY_TASKLET
+extern struct tasklet_struct aac_command_tasklet;
+int aac_command_thread(unsigned long data);
+#else
+int aac_command_thread(struct aac_dev * dev);
+#endif
+int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
+int fib_adapter_complete(struct fib * fibptr, unsigned short size);
+struct aac_driver_ident* aac_get_driver_ident(int devtype);
+int aac_get_adapter_info(struct aac_dev* dev);
diff --git a/xen/drivers/scsi/aacraid/commctrl.c b/xen/drivers/scsi/aacraid/commctrl.c
new file mode 100644
index 0000000000..15b6a62c6f
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/commctrl.c
@@ -0,0 +1,438 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  commctrl.c
+ *
+ * Abstract: Contains all routines for control of the AFA comm layer
+ *
+ */
+
+#include <xeno/config.h>
+/*  #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/*  #include <xeno/spinlock.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/completion.h> */
+#include <xeno/blk.h>
+/*  #include <asm/semaphore.h> */
+#include <asm/uaccess.h>
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+/**
+ *	ioctl_send_fib	-	send a FIB from userspace
+ *	@dev:	adapter is being processed
+ *	@arg:	arguments to the ioctl call
+ *	
+ *	This routine sends a fib to the adapter on behalf of a user level
+ *	program.
+ */
+ 
+static int ioctl_send_fib(struct aac_dev * dev, void *arg)
+{
+	struct hw_fib * kfib;
+	struct fib *fibptr;
+
+	fibptr = fib_alloc(dev);
+	if(fibptr == NULL)
+		return -ENOMEM;
+		
+	kfib = fibptr->fib;
+	/*
+	 *	First copy in the header so that we can check the size field.
+	 */
+	if (copy_from_user((void *)kfib, arg, sizeof(struct aac_fibhdr))) {
+		fib_free(fibptr);
+		return -EFAULT;
+	}
+	/*
+	 *	Since we copy based on the fib header size, make sure that we
+	 *	will not overrun the buffer when we copy the memory. Return
+	 *	an error if we would.
+	 */
+	if(le32_to_cpu(kfib->header.Size) > sizeof(struct hw_fib) - sizeof(struct aac_fibhdr)) {
+		fib_free(fibptr);
+		return -EINVAL;
+	}
+
+	if (copy_from_user((void *) kfib, arg, le32_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr))) {
+		fib_free(fibptr);
+		return -EFAULT;
+	}
+
+	if (kfib->header.Command == cpu_to_le32(TakeABreakPt)) {
+		aac_adapter_interrupt(dev);
+		/*
+		 * Since we didn't really send a fib, zero out the state to allow 
+		 * cleanup code not to assert.
+		 */
+		kfib->header.XferState = 0;
+	} else {
+		if (fib_send(kfib->header.Command, fibptr, le32_to_cpu(kfib->header.Size) , FsaNormal,
+			1, 1, NULL, NULL) != 0) 
+		{
+			fib_free(fibptr);
+			return -EINVAL;
+		}
+		if (fib_complete(fibptr) != 0) {
+			fib_free(fibptr);
+			return -EINVAL;
+		}
+	}
+	/*
+	 *	Make sure that the size returned by the adapter (which includes
+	 *	the header) is less than or equal to the size of a fib, so we
+	 *	don't corrupt application data. Then copy that size to the user
+	 *	buffer. (Don't try to add the header information again, since it
+	 *	was already included by the adapter.)
+	 */
+
+	if (copy_to_user(arg, (void *)kfib, kfib->header.Size)) {
+		fib_free(fibptr);
+		return -EFAULT;
+	}
+	fib_free(fibptr);
+	return 0;
+}
+
+/**
+ *	open_getadapter_fib	-	Get the next fib
+ *
+ *	This routine will get the next Fib, if available, from the AdapterFibContext
+ *	passed in from the user.
+ */
+
+static int open_getadapter_fib(struct aac_dev * dev, void *arg)
+{
+	struct aac_fib_context * fibctx;
+	int status;
+	unsigned long flags;
+
+	fibctx = kmalloc(sizeof(struct aac_fib_context), GFP_KERNEL);
+	if (fibctx == NULL) {
+		status = -ENOMEM;
+	} else {
+		fibctx->type = FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT;
+		fibctx->size = sizeof(struct aac_fib_context);
+#if 0
+		/*
+		 *	Initialize the mutex used to wait for the next AIF.
+		 */
+		init_MUTEX_LOCKED(&fibctx->wait_sem);
+#endif
+		fibctx->wait = 0;
+		/*
+		 *	Initialize the fibs and set the count of fibs on
+		 *	the list to 0.
+		 */
+		fibctx->count = 0;
+		INIT_LIST_HEAD(&fibctx->fibs);
+		fibctx->jiffies = jiffies/HZ;
+		/*
+		 *	Now add this context onto the adapter's 
+		 *	AdapterFibContext list.
+		 */
+		spin_lock_irqsave(&dev->fib_lock, flags);
+		list_add_tail(&fibctx->next, &dev->fib_list);
+		spin_unlock_irqrestore(&dev->fib_lock, flags);
+		if (copy_to_user(arg,  &fibctx, sizeof(struct aac_fib_context *))) {
+			status = -EFAULT;
+		} else {
+			status = 0;
+		}	
+	}
+	return status;
+}
+
+/**
+ *	next_getadapter_fib	-	get the next fib
+ *	@dev: adapter to use
+ *	@arg: ioctl argument
+ *	
+ * 	This routine will get the next Fib, if available, from the AdapterFibContext
+ *	passed in from the user.
+ */
+
+static int next_getadapter_fib(struct aac_dev * dev, void *arg)
+{
+	struct fib_ioctl f;
+	struct aac_fib_context *fibctx, *aifcp;
+	struct hw_fib * fib;
+	int status;
+	struct list_head * entry;
+	int found;
+	unsigned long flags;
+	
+	if(copy_from_user((void *)&f, arg, sizeof(struct fib_ioctl)))
+		return -EFAULT;
+	/*
+	 *	Extract the AdapterFibContext from the Input parameters.
+	 */
+	fibctx = (struct aac_fib_context *) f.fibctx;
+
+	/*
+	 *	Verify that the HANDLE passed in was a valid AdapterFibContext
+	 *
+	 *	Search the list of AdapterFibContext addresses on the adapter
+	 *	to be sure this is a valid address
+	 */
+	found = 0;
+	entry = dev->fib_list.next;
+
+	while(entry != &dev->fib_list) {
+		aifcp = list_entry(entry, struct aac_fib_context, next);
+		if(fibctx == aifcp) {   /* We found a winner */
+			found = 1;
+			break;
+		}
+		entry = entry->next;
+	}
+	if (found == 0)
+		return -EINVAL;
+
+	if((fibctx->type != FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT) ||
+		 (fibctx->size != sizeof(struct aac_fib_context)))
+		return -EINVAL;
+	status = 0;
+	spin_lock_irqsave(&dev->fib_lock, flags);
+	/*
+	 *	If there are no fibs to send back, then either wait or return
+	 *	-EAGAIN
+	 */
+return_fib:
+	if (!list_empty(&fibctx->fibs)) {
+		struct list_head * entry;
+		/*
+		 *	Pull the next fib from the fibs
+		 */
+		entry = fibctx->fibs.next;
+		list_del(entry);
+		
+		fib = list_entry(entry, struct hw_fib, header.FibLinks);
+		fibctx->count--;
+		spin_unlock_irqrestore(&dev->fib_lock, flags);
+		if (copy_to_user(f.fib, fib, sizeof(struct hw_fib))) {
+			kfree(fib);
+			return -EFAULT;
+		}	
+		/*
+		 *	Free the space occupied by this copy of the fib.
+		 */
+		kfree(fib);
+		status = 0;
+		fibctx->jiffies = jiffies/HZ;
+	} else {
+		spin_unlock_irqrestore(&dev->fib_lock, flags);
+		if (f.wait) {
+#if 0
+			if(down_interruptible(&fibctx->wait_sem) < 0) {
+				status = -EINTR;
+			} else {
+#else
+			    {
+#endif
+				/* Lock again and retry */
+				spin_lock_irqsave(&dev->fib_lock, flags);
+				goto return_fib;
+			}
+		} else {
+			status = -EAGAIN;
+		}	
+	}
+	return status;
+}
+
+int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context * fibctx)
+{
+	struct hw_fib *fib;
+
+	/*
+	 *	First free any FIBs that have not been consumed.
+	 */
+	while (!list_empty(&fibctx->fibs)) {
+		struct list_head * entry;
+		/*
+		 *	Pull the next fib from the fibs
+		 */
+		entry = fibctx->fibs.next;
+		list_del(entry);
+		fib = list_entry(entry, struct hw_fib, header.FibLinks);
+		fibctx->count--;
+		/*
+		 *	Free the space occupied by this copy of the fib.
+		 */
+		kfree(fib);
+	}
+	/*
+	 *	Remove the Context from the AdapterFibContext List
+	 */
+	list_del(&fibctx->next);
+	/*
+	 *	Invalidate context
+	 */
+	fibctx->type = 0;
+	/*
+	 *	Free the space occupied by the Context
+	 */
+	kfree(fibctx);
+	return 0;
+}
+
+/**
+ *	close_getadapter_fib	-	close down user fib context
+ *	@dev: adapter
+ *	@arg: ioctl arguments
+ *
+ *	This routine will close down the fibctx passed in from the user.
+ */
+ 
+static int close_getadapter_fib(struct aac_dev * dev, void *arg)
+{
+	struct aac_fib_context *fibctx, *aifcp;
+	int status;
+	unsigned long flags;
+	struct list_head * entry;
+	int found;
+
+	/*
+	 *	Extract the fibctx from the input parameters
+	 */
+	fibctx = arg;
+
+	/*
+	 *	Verify that the HANDLE passed in was a valid AdapterFibContext
+	 *
+	 *	Search the list of AdapterFibContext addresses on the adapter
+	 *	to be sure this is a valid address
+	 */
+
+	found = 0;
+	entry = dev->fib_list.next;
+
+	while(entry != &dev->fib_list) {
+		aifcp = list_entry(entry, struct aac_fib_context, next);
+		if(fibctx == aifcp) {   /* We found a winner */
+			found = 1;
+			break;
+		}
+		entry = entry->next;
+	}
+
+	if(found == 0)
+		return 0; /* Already gone */
+
+	if((fibctx->type != FSAFS_NTC_GET_ADAPTER_FIB_CONTEXT) ||
+		 (fibctx->size != sizeof(struct aac_fib_context)))
+		return -EINVAL;
+	spin_lock_irqsave(&dev->fib_lock, flags);
+	status = aac_close_fib_context(dev, fibctx);
+	spin_unlock_irqrestore(&dev->fib_lock, flags);
+	return status;
+}
+
+/**
+ *	check_revision	-	close down user fib context
+ *	@dev: adapter
+ *	@arg: ioctl arguments
+ *
+ *	This routine returns the firmware version.
+ *      Under Linux, there have been no version incompatibilities, so this is simple!
+ */
+
+static int check_revision(struct aac_dev *dev, void *arg)
+{
+	struct revision response;
+
+	response.compat = 1;
+	response.version = dev->adapter_info.kernelrev;
+	response.build = dev->adapter_info.kernelbuild;
+
+	if (copy_to_user(arg, &response, sizeof(response)))
+		return -EFAULT;
+	return 0;
+}
+
+
+struct aac_pci_info {
+        u32 bus;
+        u32 slot;
+};
+
+
+int aac_get_pci_info(struct aac_dev* dev, void* arg)
+{
+        struct aac_pci_info pci_info;
+
+	pci_info.bus = dev->pdev->bus->number;
+	pci_info.slot = PCI_SLOT(dev->pdev->devfn);
+
+       if(copy_to_user( arg, (void*)&pci_info, sizeof(struct aac_pci_info)))
+               return -EFAULT;
+        return 0;
+ }
+ 
+
+int aac_do_ioctl(struct aac_dev * dev, int cmd, void *arg)
+{
+	int status;
+	
+	/*
+	 *	HBA gets first crack
+	 */
+	 
+	status = aac_dev_ioctl(dev, cmd, arg);
+	if(status != -ENOTTY)
+		return status;
+
+	switch (cmd) {
+	case FSACTL_MINIPORT_REV_CHECK:
+		status = check_revision(dev, arg);
+		break;
+	case FSACTL_SENDFIB:
+		status = ioctl_send_fib(dev, arg);
+		break;
+	case FSACTL_OPEN_GET_ADAPTER_FIB:
+		status = open_getadapter_fib(dev, arg);
+		break;
+	case FSACTL_GET_NEXT_ADAPTER_FIB:
+		status = next_getadapter_fib(dev, arg);
+		break;
+	case FSACTL_CLOSE_GET_ADAPTER_FIB:
+		status = close_getadapter_fib(dev, arg);
+		break;
+	case FSACTL_GET_PCI_INFO:
+		status = aac_get_pci_info(dev,arg);
+		break;
+	default:
+		status = -ENOTTY;
+	  	break;	
+	}
+	return status;
+}
+
diff --git a/xen/drivers/scsi/aacraid/comminit.c b/xen/drivers/scsi/aacraid/comminit.c
new file mode 100644
index 0000000000..29a3dba28e
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/comminit.c
@@ -0,0 +1,350 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  comminit.c
+ *
+ * Abstract: This supports the initialization of the host adapter commuication interface.
+ *    This is a platform dependent module for the pci cyclone board.
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <xeno/spinlock.h>
+/* #include <xeno/slab.h> */
+#include <xeno/blk.h>
+/* #include <xeno/completion.h> */
+/* #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+struct aac_common aac_config;
+
+static struct aac_dev *devices;
+
+static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long commsize, unsigned long commalign)
+{
+	unsigned char *base;
+	unsigned long size, align;
+	unsigned long fibsize = 4096;
+	unsigned long printfbufsiz = 256;
+	struct aac_init *init;
+	dma_addr_t phys;
+
+	/* FIXME: Adaptec add 128 bytes to this value - WHY ?? */
+	size = fibsize + sizeof(struct aac_init) + commsize + commalign + printfbufsiz;
+
+	base = pci_alloc_consistent(dev->pdev, size, &phys);
+	if(base == NULL)
+	{
+		printk(KERN_ERR "aacraid: unable to create mapping.\n");
+		return 0;
+	}
+	dev->comm_addr = (void *)base;
+	dev->comm_phys = phys;
+	dev->comm_size = size;
+
+	dev->init = (struct aac_init *)(base + fibsize);
+	dev->init_pa = phys + fibsize;
+
+	/*
+	 *	Cache the upper bits of the virtual mapping for 64bit boxes
+	 *	FIXME: this crap should be rewritten
+	 */
+#if BITS_PER_LONG >= 64 
+	dev->fib_base_va = ((ulong)base & 0xffffffff00000000);
+#endif
+
+	init = dev->init;
+
+	init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION);
+	init->MiniPortRevision = cpu_to_le32(Sa_MINIPORT_REVISION);
+	init->fsrev = cpu_to_le32(dev->fsrev);
+
+	/*
+	 *	Adapter Fibs are the first thing allocated so that they
+	 *	start page aligned
+	 */
+	init->AdapterFibsVirtualAddress = cpu_to_le32((u32)base);
+	init->AdapterFibsPhysicalAddress = cpu_to_le32(phys);
+	init->AdapterFibsSize = cpu_to_le32(fibsize);
+	init->AdapterFibAlign = cpu_to_le32(sizeof(struct hw_fib));
+
+	/*
+	 * Increment the base address by the amount already used
+	 */
+	base = base + fibsize + sizeof(struct aac_init);
+	phys = phys + fibsize + sizeof(struct aac_init);
+	/*
+	 *	Align the beginning of Headers to commalign
+	 */
+	align = (commalign - ((unsigned long)(base) & (commalign - 1)));
+	base = base + align;
+	phys = phys + align;
+	/*
+	 *	Fill in addresses of the Comm Area Headers and Queues
+	 */
+	*commaddr = (unsigned long *)base;
+	init->CommHeaderAddress = cpu_to_le32(phys);
+	/*
+	 *	Increment the base address by the size of the CommArea
+	 */
+	base = base + commsize;
+	phys = phys + commsize;
+	/*
+	 *	 Place the Printf buffer area after the Fast I/O comm area.
+	 */
+	dev->printfbuf = (void *)base;
+	init->printfbuf = cpu_to_le32(phys);
+	init->printfbufsiz = cpu_to_le32(printfbufsiz);
+	memset(base, 0, printfbufsiz);
+	return 1;
+}
+    
+static void aac_queue_init(struct aac_dev * dev, struct aac_queue * q, u32 *mem, int qsize)
+{
+	q->numpending = 0;
+	q->dev = dev;
+	INIT_LIST_HEAD(&q->pendingq);
+#if 0
+	init_waitqueue_head(&q->cmdready);
+#endif
+	INIT_LIST_HEAD(&q->cmdq);
+#if 0
+	init_waitqueue_head(&q->qfull);
+#endif
+	spin_lock_init(&q->lockdata);
+	q->lock = &q->lockdata;
+	q->headers.producer = mem;
+	q->headers.consumer = mem+1;
+	*q->headers.producer = cpu_to_le32(qsize);
+	*q->headers.consumer = cpu_to_le32(qsize);
+	q->entries = qsize;
+}
+
+/**
+ *	aac_send_shutdown		-	shutdown an adapter
+ *	@dev: Adapter to shutdown
+ *
+ *	This routine will send a VM_CloseAll (shutdown) request to the adapter.
+ */
+
+static int aac_send_shutdown(struct aac_dev * dev)
+{
+	struct fib * fibctx;
+	struct aac_close *cmd;
+	int status;
+
+	fibctx = fib_alloc(dev);
+	fib_init(fibctx);
+
+	cmd = (struct aac_close *) fib_data(fibctx);
+
+	cmd->command = cpu_to_le32(VM_CloseAll);
+	cmd->cid = cpu_to_le32(0xffffffff);
+
+	status = fib_send(ContainerCommand,
+			  fibctx,
+			  sizeof(struct aac_close),
+			  FsaNormal,
+			  1, 1,
+			  NULL, NULL);
+
+	if (status == 0)
+		fib_complete(fibctx);
+	fib_free(fibctx);
+	return status;
+}
+
+/**
+ *	aac_detach	-	detach adapter
+ *	@detach: adapter to disconnect
+ *
+ *	Disconnect and shutdown an AAC based adapter, freeing resources
+ *	as we go.
+ */
+
+int aac_detach(struct aac_dev *detach)
+{
+	struct aac_dev **dev = &devices;
+	
+	while(*dev)
+	{
+		if(*dev == detach)
+		{
+			*dev = detach->next;
+			aac_send_shutdown(detach);
+			fib_map_free(detach);
+			pci_free_consistent(detach->pdev, detach->comm_size, detach->comm_addr, detach->comm_phys);
+			kfree(detach->queues);
+			return 1;
+		}
+		dev=&((*dev)->next);
+	}
+	BUG();
+	return 0;
+}
+
+/**
+ *	aac_comm_init	-	Initialise FSA data structures
+ *	@dev:	Adapter to intialise
+ *
+ *	Initializes the data structures that are required for the FSA commuication
+ *	interface to operate. 
+ *	Returns
+ *		1 - if we were able to init the commuication interface.
+ *		0 - If there were errors initing. This is a fatal error.
+ */
+ 
+int aac_comm_init(struct aac_dev * dev)
+{
+	unsigned long hdrsize = (sizeof(u32) * NUMBER_OF_COMM_QUEUES) * 2;
+	unsigned long queuesize = sizeof(struct aac_entry) * TOTAL_QUEUE_ENTRIES;
+	u32 *headers;
+	struct aac_entry * queues;
+	unsigned long size;
+	struct aac_queue_block * comm = dev->queues;
+
+	/*
+	 *	Now allocate and initialize the zone structures used as our 
+	 *	pool of FIB context records.  The size of the zone is based
+	 *	on the system memory size.  We also initialize the mutex used
+	 *	to protect the zone.
+	 */
+	spin_lock_init(&dev->fib_lock);
+
+	/*
+	 *	Allocate the physically contigous space for the commuication
+	 *	queue headers. 
+	 */
+
+	size = hdrsize + queuesize;
+
+	if (!aac_alloc_comm(dev, (void * *)&headers, size, QUEUE_ALIGNMENT))
+		return -ENOMEM;
+
+	queues = (struct aac_entry *)((unsigned char *)headers + hdrsize);
+
+	/* Adapter to Host normal proirity Command queue */ 
+	comm->queue[HostNormCmdQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[HostNormCmdQueue], headers, HOST_NORM_CMD_ENTRIES);
+	queues += HOST_NORM_CMD_ENTRIES;
+	headers += 2;
+
+	/* Adapter to Host high priority command queue */
+	comm->queue[HostHighCmdQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[HostHighCmdQueue], headers, HOST_HIGH_CMD_ENTRIES);
+    
+	queues += HOST_HIGH_CMD_ENTRIES;
+	headers +=2;
+
+	/* Host to adapter normal priority command queue */
+	comm->queue[AdapNormCmdQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[AdapNormCmdQueue], headers, ADAP_NORM_CMD_ENTRIES);
+    
+	queues += ADAP_NORM_CMD_ENTRIES;
+	headers += 2;
+
+	/* host to adapter high priority command queue */
+	comm->queue[AdapHighCmdQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[AdapHighCmdQueue], headers, ADAP_HIGH_CMD_ENTRIES);
+    
+	queues += ADAP_HIGH_CMD_ENTRIES;
+	headers += 2;
+
+	/* adapter to host normal priority response queue */
+	comm->queue[HostNormRespQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[HostNormRespQueue], headers, HOST_NORM_RESP_ENTRIES);
+    
+	queues += HOST_NORM_RESP_ENTRIES;
+	headers += 2;
+
+	/* adapter to host high priority response queue */
+	comm->queue[HostHighRespQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[HostHighRespQueue], headers, HOST_HIGH_RESP_ENTRIES);
+   
+	queues += HOST_HIGH_RESP_ENTRIES;
+	headers += 2;
+
+	/* host to adapter normal priority response queue */
+	comm->queue[AdapNormRespQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[AdapNormRespQueue], headers, ADAP_NORM_RESP_ENTRIES);
+
+	queues += ADAP_NORM_RESP_ENTRIES;
+	headers += 2;
+	
+	/* host to adapter high priority response queue */ 
+	comm->queue[AdapHighRespQueue].base = queues;
+	aac_queue_init(dev, &comm->queue[AdapHighRespQueue], headers, ADAP_HIGH_RESP_ENTRIES);
+
+	comm->queue[AdapNormCmdQueue].lock = comm->queue[HostNormRespQueue].lock;
+	comm->queue[AdapHighCmdQueue].lock = comm->queue[HostHighRespQueue].lock;
+	comm->queue[AdapNormRespQueue].lock = comm->queue[HostNormCmdQueue].lock;
+	comm->queue[AdapHighRespQueue].lock = comm->queue[HostHighCmdQueue].lock;
+
+	return 0;
+}
+
+struct aac_dev *aac_init_adapter(struct aac_dev *dev)
+{
+	/*
+	 *	Ok now init the communication subsystem
+	 */
+	dev->queues = (struct aac_queue_block *) 
+	    kmalloc(sizeof(struct aac_queue_block), GFP_KERNEL);
+	if (dev->queues == NULL) {
+		printk(KERN_ERR "Error could not allocate comm region.\n");
+		return NULL;
+	}
+	memset(dev->queues, 0, sizeof(struct aac_queue_block));
+
+	printk("aac_init_adapater, dev is %p\n", dev); 
+	if (aac_comm_init(dev)<0)
+		return NULL;
+	printk("aac_init_adapater, dev->init is %p\n", dev->init); 
+	/*
+	 *	Initialize the list of fibs
+	 */
+	if(fib_setup(dev)<0)
+	    return NULL;
+		
+	INIT_LIST_HEAD(&dev->fib_list);
+#if 0
+	init_completion(&dev->aif_completion);
+#endif
+	/*
+	 *	Add this adapter in to our dev List.
+	 */
+	dev->next = devices;
+	devices = dev;
+	return dev;
+}
+
+    
diff --git a/xen/drivers/scsi/aacraid/commsup.c b/xen/drivers/scsi/aacraid/commsup.c
new file mode 100644
index 0000000000..7d84ad241c
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/commsup.c
@@ -0,0 +1,1028 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  commsup.c
+ *
+ * Abstract: Contain all routines that are required for FSA host/adapter
+ *    commuication.
+ *
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <xeno/spinlock.h>
+
+#include <xeno/interrupt.h> /* tasklet stuff */
+
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/completion.h> */
+/*  #include <asm/semaphore.h> */
+#include <xeno/blk.h>
+#include <xeno/delay.h>
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+/**
+ *	fib_map_alloc		-	allocate the fib objects
+ *	@dev: Adapter to allocate for
+ *
+ *	Allocate and map the shared PCI space for the FIB blocks used to
+ *	talk to the Adaptec firmware.
+ */
+ 
+static int fib_map_alloc(struct aac_dev *dev)
+{
+    if((dev->hw_fib_va = 
+	pci_alloc_consistent(dev->pdev, sizeof(struct hw_fib) * AAC_NUM_FIB, 
+			     &dev->hw_fib_pa))==NULL)
+	return -ENOMEM;
+    return 0;
+}
+
+/**
+ *	fib_map_free		-	free the fib objects
+ *	@dev: Adapter to free
+ *
+ *	Free the PCI mappings and the memory allocated for FIB blocks
+ *	on this adapter.
+ */
+
+void fib_map_free(struct aac_dev *dev)
+{
+    pci_free_consistent(dev->pdev, sizeof(struct hw_fib) * AAC_NUM_FIB, 
+			dev->hw_fib_va, dev->hw_fib_pa);
+}
+
+/**
+ *	fib_setup	-	setup the fibs
+ *	@dev: Adapter to set up
+ *
+ *	Allocate the PCI space for the fibs, map it and then intialise the
+ *	fib area, the unmapped fib data and also the free list
+ */
+
+int fib_setup(struct aac_dev * dev)
+{
+    struct fib *fibptr;
+    struct hw_fib *fib;
+    dma_addr_t fibpa;
+    int i;
+    
+    if(fib_map_alloc(dev)<0)
+	return -ENOMEM;
+    
+    fib = dev->hw_fib_va;
+    fibpa = dev->hw_fib_pa;
+    memset(fib, 0, sizeof(struct hw_fib) * AAC_NUM_FIB);
+    /*
+     *	Initialise the fibs
+     */
+    for (i = 0, fibptr = &dev->fibs[i]; i < AAC_NUM_FIB; i++, fibptr++) 
+    {
+	fibptr->dev = dev;
+	fibptr->fib = fib;
+	fibptr->data = (void *) fibptr->fib->data;
+	fibptr->next = fibptr+1;	/* Forward chain the fibs */
+#if 0
+	init_MUTEX_LOCKED(&fibptr->event_wait);
+#endif
+	spin_lock_init(&fibptr->event_lock);
+	fib->header.XferState = cpu_to_le32(0xffffffff);
+	fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
+	fibptr->logicaladdr = (unsigned long) fibpa;
+	fib = (struct hw_fib *)((unsigned char *)fib + sizeof(struct hw_fib));
+	fibpa = fibpa + sizeof(struct hw_fib);
+    }
+    /*
+     *	Add the fib chain to the free list
+     */
+    dev->fibs[AAC_NUM_FIB-1].next = NULL;
+    /*
+     *	Enable this to debug out of queue space
+     */
+    dev->free_fib = &dev->fibs[0];
+    return 0;
+}
+
+/**
+ *	fib_alloc	-	allocate a fib
+ *	@dev: Adapter to allocate the fib for
+ *
+ *	Allocate a fib from the adapter fib pool. If the pool is empty we
+ *	wait for fibs to become free.
+ */
+ 
+struct fib * fib_alloc(struct aac_dev *dev)
+{
+    struct fib * fibptr;
+    unsigned long flags;
+    
+    spin_lock_irqsave(&dev->fib_lock, flags);
+    fibptr = dev->free_fib;	
+    if(!fibptr)
+	BUG();
+    dev->free_fib = fibptr->next;
+    spin_unlock_irqrestore(&dev->fib_lock, flags);
+    /*
+     *	Set the proper node type code and node byte size
+     */
+    fibptr->type = FSAFS_NTC_FIB_CONTEXT;
+    fibptr->size = sizeof(struct fib);
+    /*
+     *	Null out fields that depend on being zero at the start of
+     *	each I/O
+     */
+    fibptr->fib->header.XferState = cpu_to_le32(0);
+    fibptr->callback = NULL;
+    fibptr->callback_data = NULL;
+    
+    return fibptr;
+}
+
+/**
+ *	fib_free	-	free a fib
+ *	@fibptr: fib to free up
+ *
+ *	Frees up a fib and places it on the appropriate queue
+ *	(either free or timed out)
+ */
+ 
+void fib_free(struct fib * fibptr)
+{
+    unsigned long flags;
+    
+    spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
+    
+    if (fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT) {
+	aac_config.fib_timeouts++;
+	fibptr->next = fibptr->dev->timeout_fib;
+	fibptr->dev->timeout_fib = fibptr;
+    } else {
+	if (fibptr->fib->header.XferState != 0) {
+	    printk(KERN_WARNING "fib_free, XferState != 0, "
+		   "fibptr = 0x%p, XferState = 0x%x\n", 
+		   (void *)fibptr, fibptr->fib->header.XferState);
+	}
+	fibptr->next = fibptr->dev->free_fib;
+	fibptr->dev->free_fib = fibptr;
+    }	
+    spin_unlock_irqrestore(&fibptr->dev->fib_lock, flags);
+}
+
+/**
+ *	fib_init	-	initialise a fib
+ *	@fibptr: The fib to initialize
+ *	
+ *	Set up the generic fib fields ready for use
+ */
+ 
+void fib_init(struct fib *fibptr)
+{
+    struct hw_fib *fib = fibptr->fib;
+    
+    fib->header.StructType = FIB_MAGIC;
+    fib->header.Size = cpu_to_le16(sizeof(struct hw_fib));
+    fib->header.XferState = cpu_to_le32(HostOwned | FibInitialized | 
+					FibEmpty | FastResponseCapable);
+    fib->header.SenderFibAddress = cpu_to_le32(0);
+    fib->header.ReceiverFibAddress = cpu_to_le32(0);
+    fib->header.SenderSize = cpu_to_le16(sizeof(struct hw_fib));
+}
+
+/**
+ *	fib_deallocate		-	deallocate a fib
+ *	@fibptr: fib to deallocate
+ *
+ *	Will deallocate and return to the free pool the FIB pointed to by the
+ *	caller.
+ */
+ 
+void fib_dealloc(struct fib * fibptr)
+{
+    struct hw_fib *fib = fibptr->fib;
+    if(fib->header.StructType != FIB_MAGIC) 
+	BUG();
+    fib->header.XferState = cpu_to_le32(0);        
+}
+
+/*
+ *	Commuication primitives define and support the queuing method we use to
+ *	support host to adapter commuication. All queue accesses happen through
+ *	these routines and are the only routines which have a knowledge of the
+ *	 how these queues are implemented.
+ */
+ 
+/**
+ *	aac_get_entry		-	get a queue entry
+ *	@dev: Adapter
+ *	@qid: Queue Number
+ *	@entry: Entry return
+ *	@index: Index return
+ *	@nonotify: notification control
+ *
+ *	With a priority the routine returns a queue entry if the queue has free entries. If the queue
+ *	is full(no free entries) than no entry is returned and the function returns 0 otherwise 1 is
+ *	returned.
+ */
+ 
+static int aac_get_entry (struct aac_dev * dev, u32 qid, struct aac_entry **entry, u32 * index, unsigned long *nonotify)
+{
+    struct aac_queue * q;
+
+    /*
+     *	All of the queues wrap when they reach the end, so we check
+     *	to see if they have reached the end and if they have we just
+     *	set the index back to zero. This is a wrap. You could or off
+     *	the high bits in all updates but this is a bit faster I think.
+     */
+
+    q = &dev->queues->queue[qid];
+	
+    *index = le32_to_cpu(*(q->headers.producer));
+    if (*index - 2 == le32_to_cpu(*(q->headers.consumer)))
+	*nonotify = 1; 
+
+    if (qid == AdapHighCmdQueue) {
+	if (*index >= ADAP_HIGH_CMD_ENTRIES)
+	    *index = 0;
+    } else if (qid == AdapNormCmdQueue) {
+	if (*index >= ADAP_NORM_CMD_ENTRIES) 
+	    *index = 0; /* Wrap to front of the Producer Queue. */
+    }
+    else if (qid == AdapHighRespQueue) 
+    {
+	if (*index >= ADAP_HIGH_RESP_ENTRIES)
+	    *index = 0;
+    }
+    else if (qid == AdapNormRespQueue) 
+    {
+	if (*index >= ADAP_NORM_RESP_ENTRIES) 
+	    *index = 0; /* Wrap to front of the Producer Queue. */
+    }
+    else BUG();
+
+    if (*index + 1 == le32_to_cpu(*(q->headers.consumer))) { /* Queue full */
+	printk(KERN_WARNING "Queue %d full, %ld outstanding.\n", 
+	       qid, q->numpending);
+	return 0;
+    } else {
+	*entry = q->base + *index;
+	return 1;
+    }
+}   
+
+/**
+ *	aac_queue_get		-	get the next free QE
+ *	@dev: Adapter
+ *	@index: Returned index
+ *	@priority: Priority of fib
+ *	@fib: Fib to associate with the queue entry
+ *	@wait: Wait if queue full
+ *	@fibptr: Driver fib object to go with fib
+ *	@nonotify: Don't notify the adapter
+ *
+ *	Gets the next free QE off the requested priorty adapter command
+ *	queue and associates the Fib with the QE. The QE represented by
+ *	index is ready to insert on the queue when this routine returns
+ *	success.
+ */
+
+static int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * fib, int wait, struct fib * fibptr, unsigned long *nonotify)
+{
+    struct aac_entry * entry = NULL;
+    int map = 0;
+    struct aac_queue * q = &dev->queues->queue[qid];
+		
+    spin_lock_irqsave(q->lock, q->SavedIrql);
+	    
+    if (qid == AdapHighCmdQueue || qid == AdapNormCmdQueue) 
+    {
+	/*  if no entries wait for some if caller wants to */
+	while (!aac_get_entry(dev, qid, &entry, index, nonotify)) 
+	{
+	    printk(KERN_ERR "GetEntries failed\n");
+	}
+	/*
+	 *	Setup queue entry with a command, status and fib mapped
+	 */
+	entry->size = cpu_to_le32(le16_to_cpu(fib->header.Size));
+	map = 1;
+    }
+    else if (qid == AdapHighRespQueue || qid == AdapNormRespQueue)
+    {
+	while(!aac_get_entry(dev, qid, &entry, index, nonotify)) 
+	{
+	    /* if no entries wait for some if caller wants to */
+	}
+	/*
+	 *	Setup queue entry with command, status and fib mapped
+	 */
+	entry->size = cpu_to_le32(le16_to_cpu(fib->header.Size));
+	entry->addr = cpu_to_le32(fib->header.SenderFibAddress);     		/* Restore adapters pointer to the FIB */
+	fib->header.ReceiverFibAddress = fib->header.SenderFibAddress;		/* Let the adapter now where to find its data */
+	map = 0;
+    } 
+    /*
+     *	If MapFib is true than we need to map the Fib and put pointers
+     *	in the queue entry.
+     */
+    if (map)
+	entry->addr = cpu_to_le32((unsigned long)(fibptr->logicaladdr));
+    return 0;
+}
+
+
+/**
+ *	aac_insert_entry	-	insert a queue entry
+ *	@dev: Adapter
+ *	@index: Index of entry to insert
+ *	@qid: Queue number
+ *	@nonotify: Suppress adapter notification
+ *
+ *	Gets the next free QE off the requested priorty adapter command
+ *	queue and associates the Fib with the QE. The QE represented by
+ *	index is ready to insert on the queue when this routine returns
+ *	success.
+ */
+ 
+static int aac_insert_entry(struct aac_dev * dev, u32 index, u32 qid, unsigned long nonotify) 
+{
+    struct aac_queue * q = &dev->queues->queue[qid];
+
+    if(q == NULL)
+	BUG();
+    *(q->headers.producer) = cpu_to_le32(index + 1);
+    spin_unlock_irqrestore(q->lock, q->SavedIrql);
+
+    if (qid == AdapHighCmdQueue ||
+	qid == AdapNormCmdQueue ||
+	qid == AdapHighRespQueue ||
+	qid == AdapNormRespQueue)
+    {
+	if (!nonotify)
+	    aac_adapter_notify(dev, qid);
+    }
+    else
+	printk("Suprise insert!\n");
+    return 0;
+}
+
+/*
+ *	Define the highest level of host to adapter communication routines. 
+ *	These routines will support host to adapter FS commuication. These 
+ *	routines have no knowledge of the commuication method used. This level
+ *	sends and receives FIBs. This level has no knowledge of how these FIBs
+ *	get passed back and forth.
+ */
+
+/**
+ *	fib_send	-	send a fib to the adapter
+ *	@command: Command to send
+ *	@fibptr: The fib
+ *	@size: Size of fib data area
+ *	@priority: Priority of Fib
+ *	@wait: Async/sync select
+ *	@reply: True if a reply is wanted
+ *	@callback: Called with reply
+ *	@callback_data: Passed to callback
+ *
+ *	Sends the requested FIB to the adapter and optionally will wait for a
+ *	response FIB. If the caller does not wish to wait for a response than
+ *	an event to wait on must be supplied. This event will be set when a
+ *	response FIB is received from the adapter.
+ */
+ 
+int fib_send(u16 command, struct fib * fibptr, unsigned long size,  int priority, int wait, int reply, fib_callback callback, void * callback_data)
+{
+    u32 index;
+    u32 qid;
+    struct aac_dev * dev = fibptr->dev;
+    unsigned long nointr = 0;
+    struct hw_fib * fib = fibptr->fib;
+    struct aac_queue * q;
+    unsigned long flags = 0;
+
+    if (!(le32_to_cpu(fib->header.XferState) & HostOwned))
+	return -EBUSY;
+    /*
+     *	There are 5 cases with the wait and reponse requested flags. 
+     *	The only invalid cases are if the caller requests to wait and
+     *	does not request a response and if the caller does not want a
+     *	response and the Fibis not allocated from pool. If a response
+     *	is not requesed the Fib will just be deallocaed by the DPC
+     *	routine when the response comes back from the adapter. No
+     *	further processing will be done besides deleting the Fib. We 
+     *	will have a debug mode where the adapter can notify the host
+     *	it had a problem and the host can log that fact.
+     */
+    if (wait && !reply) {
+	return -EINVAL;
+    } else if (!wait && reply) {
+	fib->header.XferState |= cpu_to_le32(Async | ResponseExpected);
+	FIB_COUNTER_INCREMENT(aac_config.AsyncSent);
+    } else if (!wait && !reply) {
+	fib->header.XferState |= cpu_to_le32(NoResponseExpected);
+	FIB_COUNTER_INCREMENT(aac_config.NoResponseSent);
+    } else if (wait && reply) {
+	fib->header.XferState |= cpu_to_le32(ResponseExpected);
+	FIB_COUNTER_INCREMENT(aac_config.NormalSent);
+    } 
+    /*
+     *	Map the fib into 32bits by using the fib number
+     */
+    fib->header.SenderData = fibptr-&dev->fibs[0];	/* for callback */
+    /*
+     *	Set FIB state to indicate where it came from and if we want a
+     *	response from the adapter. Also load the command from the
+     *	caller.
+     *
+     *	Map the hw fib pointer as a 32bit value
+     */
+    fib->header.SenderFibAddress = fib2addr(fib);
+    fib->header.Command = cpu_to_le16(command);
+    fib->header.XferState |= cpu_to_le32(SentFromHost);
+    fibptr->fib->header.Flags = 0; /* Zero flags field - its internal only */
+    /*
+     *	Set the size of the Fib we want to send to the adapter
+     */
+    fib->header.Size = cpu_to_le16(sizeof(struct aac_fibhdr) + size);
+    if (le16_to_cpu(fib->header.Size) > le16_to_cpu(fib->header.SenderSize)) {
+	return -EMSGSIZE;
+    }                
+    /*
+     *	Get a queue entry connect the FIB to it and send an notify
+     *	the adapter a command is ready.
+     */
+    if (priority == FsaHigh) {
+	fib->header.XferState |= cpu_to_le32(HighPriority);
+	qid = AdapHighCmdQueue;
+    } else {
+	fib->header.XferState |= cpu_to_le32(NormalPriority);
+	qid = AdapNormCmdQueue;
+    }
+    q = &dev->queues->queue[qid];
+
+    if(wait)
+	spin_lock_irqsave(&fibptr->event_lock, flags);
+
+    if(aac_queue_get( dev, &index, qid, fib, 1, fibptr, &nointr)<0)
+	return -EWOULDBLOCK;
+    dprintk((KERN_DEBUG "fib_send: inserting a queue entry at index %d.\n",
+	     index));
+    dprintk((KERN_DEBUG "Fib contents:.\n"));
+    dprintk((KERN_DEBUG "  Command =               %d.\n", 
+	     fib->header.Command));
+    dprintk((KERN_DEBUG "  XferState  =            %x.\n", 
+	     fib->header.XferState));
+    /*
+     *	Fill in the Callback and CallbackContext if we are not
+     *	going to wait.
+     */
+    if (!wait) {
+	fibptr->callback = callback;
+	fibptr->callback_data = callback_data;
+    }
+    FIB_COUNTER_INCREMENT(aac_config.FibsSent);
+    list_add_tail(&fibptr->queue, &q->pendingq);
+    q->numpending++;
+
+    fibptr->done = 0;
+
+    if(aac_insert_entry(dev, index, qid, 
+			(nointr & aac_config.irq_mod)) < 0)
+	return -EWOULDBLOCK;
+    /*
+     *	If the caller wanted us to wait for response wait now. 
+     */
+    
+    if (wait) {
+	spin_unlock_irqrestore(&fibptr->event_lock, flags);
+#if 0
+	down(&fibptr->event_wait);
+#endif
+#ifdef TRY_TASKLET
+        /*
+         * XXX KAF: Well, this is pretty gross. We should probably
+         * do_softirq() after scheduling the tasklet, as long as we
+         * are _sure_ we hold no locks here...
+         */
+	printk("about to softirq aac_command_thread...\n"); 
+	while (!fibptr->done) { 
+            tasklet_schedule(&aac_command_tasklet);
+	    mdelay(100); 
+	}
+	printk("back from softirq cmd thread and fibptr->done!\n"); 
+#else 
+	printk("about to bail at aac_command_thread...\n"); 
+	while (!fibptr->done) { 
+	    mdelay(100); 
+	    aac_command_thread(dev); 
+	}
+	printk("back from command thread and fibptr->done!\n"); 
+#endif
+/*  if(fibptr->done == 0) */
+/*  			BUG(); */
+			
+	if((fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
+	    return -ETIMEDOUT;
+	else
+	    return 0;
+    }
+    /*
+     *	If the user does not want a response than return success otherwise
+     *	return pending
+     */
+    if (reply)
+	return -EINPROGRESS;
+    else
+	return 0;
+}
+
+/** 
+ *	aac_consumer_get	-	get the top of the queue
+ *	@dev: Adapter
+ *	@q: Queue
+ *	@entry: Return entry
+ *
+ *      Will return a pointer to the entry on the top of the queue
+ * 	requested that we are a consumer of, and return the address of
+ * 	the queue entry. It does * not change the state of the queue.
+ */
+
+int aac_consumer_get(struct aac_dev * dev, struct aac_queue * q, struct aac_entry **entry)
+{
+    u32 index;
+    int status;
+
+    if (*q->headers.producer == *q->headers.consumer) {
+	status = 0;
+    } else {
+	/*
+	 *	The consumer index must be wrapped if we have reached
+	 *	the end of the queue, else we just use the entry
+	 *	pointed to by the header index
+	 */
+	if (le32_to_cpu(*q->headers.consumer) >= q->entries) 
+	    index = 0;		
+	else
+	    index = le32_to_cpu(*q->headers.consumer);
+	*entry = q->base + index;
+	status = 1;
+    }
+    return(status);
+}
+
+int aac_consumer_avail(struct aac_dev *dev, struct aac_queue * q)
+{
+    return (*q->headers.producer != *q->headers.consumer);
+}
+
+
+/**
+ *	aac_consumer_free	-	free consumer entry
+ *	@dev: Adapter
+ *	@q: Queue
+ *	@qid: Queue ident
+ *
+ *	Frees up the current top of the queue we are a consumer of. If the
+ *	queue was full notify the producer that the queue is no longer full.
+ */
+
+void aac_consumer_free(struct aac_dev * dev, struct aac_queue *q, u32 qid)
+{
+    int wasfull = 0;
+    u32 notify;
+
+    if (*q->headers.producer+1 == *q->headers.consumer)
+	wasfull = 1;
+        
+    if (le32_to_cpu(*q->headers.consumer) >= q->entries)
+	*q->headers.consumer = cpu_to_le32(1);
+    else
+	*q->headers.consumer = 
+	    cpu_to_le32(le32_to_cpu(*q->headers.consumer)+1);
+        
+    if (wasfull) {
+	switch (qid) {
+
+	case HostNormCmdQueue:
+	    notify = HostNormCmdNotFull;
+	    break;
+	case HostHighCmdQueue:
+	    notify = HostHighCmdNotFull;
+	    break;
+	case HostNormRespQueue:
+	    notify = HostNormRespNotFull;
+	    break;
+	case HostHighRespQueue:
+	    notify = HostHighRespNotFull;
+	    break;
+	default:
+	    BUG();
+	    return;
+	}
+	aac_adapter_notify(dev, notify);
+    }
+}        
+
+/**
+ *	fib_adapter_complete	-	complete adapter issued fib
+ *	@fibptr: fib to complete
+ *	@size: size of fib
+ *
+ *	Will do all necessary work to complete a FIB that was sent from
+ *	the adapter.
+ */
+
+int fib_adapter_complete(struct fib * fibptr, unsigned short size)
+{
+    struct hw_fib * fib = fibptr->fib;
+    struct aac_dev * dev = fibptr->dev;
+    unsigned long nointr = 0;
+
+    if (le32_to_cpu(fib->header.XferState) == 0)
+	return 0;
+    /*
+     *	If we plan to do anything check the structure type first.
+     */ 
+    if ( fib->header.StructType != FIB_MAGIC ) {
+	return -EINVAL;
+    }
+    /*
+     *	This block handles the case where the adapter had sent us a
+     *	command and we have finished processing the command. We
+     *	call completeFib when we are done processing the command 
+     *	and want to send a response back to the adapter. This will 
+     *	send the completed cdb to the adapter.
+     */
+    if (fib->header.XferState & cpu_to_le32(SentFromAdapter)) {
+	fib->header.XferState |= cpu_to_le32(HostProcessed);
+	if (fib->header.XferState & cpu_to_le32(HighPriority)) {
+	    u32 index;
+	    if (size) 
+	    {
+		size += sizeof(struct aac_fibhdr);
+		if (size > le16_to_cpu(fib->header.SenderSize))
+		    return -EMSGSIZE;
+		fib->header.Size = cpu_to_le16(size);
+	    }
+	    if(aac_queue_get(dev, &index, AdapHighRespQueue, 
+			     fib, 1, NULL, &nointr) < 0) {
+		return -EWOULDBLOCK;
+	    }
+	    if (aac_insert_entry(dev, index, AdapHighRespQueue,  
+				 (nointr & (int)aac_config.irq_mod)) != 0) {
+	    }
+	}
+	else if (fib->header.XferState & NormalPriority) 
+	{
+	    u32 index;
+
+	    if (size) {
+		size += sizeof(struct aac_fibhdr);
+		if (size > le16_to_cpu(fib->header.SenderSize)) 
+		    return -EMSGSIZE;
+		fib->header.Size = cpu_to_le16(size);
+	    }
+	    if (aac_queue_get(dev, &index, AdapNormRespQueue, 
+			      fib, 1, NULL, &nointr) < 0) 
+		return -EWOULDBLOCK;
+	    if (aac_insert_entry(dev, index, AdapNormRespQueue, 
+				 (nointr & (int)aac_config.irq_mod)) != 0) 
+	    {
+	    }
+	}
+    }
+    else 
+    {
+	printk(KERN_WARNING 
+	       "fib_adapter_complete: Unknown xferstate detected.\n");
+	BUG();
+    }   
+    return 0;
+}
+
+/**
+ *	fib_complete	-	fib completion handler
+ *	@fib: FIB to complete
+ *
+ *	Will do all necessary work to complete a FIB.
+ */
+ 
+int fib_complete(struct fib * fibptr)
+{
+    struct hw_fib * fib = fibptr->fib;
+
+    /*
+     *	Check for a fib which has already been completed
+     */
+
+    if (fib->header.XferState == cpu_to_le32(0))
+	return 0;
+    /*
+     *	If we plan to do anything check the structure type first.
+     */ 
+
+    if (fib->header.StructType != FIB_MAGIC)
+	return -EINVAL;
+    /*
+     *	This block completes a cdb which orginated on the host and we 
+     *	just need to deallocate the cdb or reinit it. At this point the
+     *	command is complete that we had sent to the adapter and this
+     *	cdb could be reused.
+     */
+    if((fib->header.XferState & cpu_to_le32(SentFromHost)) &&
+       (fib->header.XferState & cpu_to_le32(AdapterProcessed)))
+    {
+	fib_dealloc(fibptr);
+    }
+    else if(fib->header.XferState & cpu_to_le32(SentFromHost))
+    {
+	/*
+	 *	This handles the case when the host has aborted the I/O
+	 *	to the adapter because the adapter is not responding
+	 */
+	fib_dealloc(fibptr);
+    } else if(fib->header.XferState & cpu_to_le32(HostOwned)) {
+	fib_dealloc(fibptr);
+    } else {
+	BUG();
+    }   
+    return 0;
+}
+
+/**
+ *	aac_printf	-	handle printf from firmware
+ *	@dev: Adapter
+ *	@val: Message info
+ *
+ *	Print a message passed to us by the controller firmware on the
+ *	Adaptec board
+ */
+
+void aac_printf(struct aac_dev *dev, u32 val)
+{
+    int length = val & 0xffff;
+    int level = (val >> 16) & 0xffff;
+    char *cp = dev->printfbuf;
+	
+    /*
+     *	The size of the printfbuf is set in port.c
+     *	There is no variable or define for it
+     */
+    if (length > 255)
+	length = 255;
+    if (cp[length] != 0)
+	cp[length] = 0;
+    if (level == LOG_HIGH_ERROR)
+	printk(KERN_WARNING "aacraid:%s", cp);
+    else
+	printk(KERN_INFO "aacraid:%s", cp);
+    memset(cp, 0,  256);
+}
+
+
+/**
+ *	aac_handle_aif		-	Handle a message from the firmware
+ *	@dev: Which adapter this fib is from
+ *	@fibptr: Pointer to fibptr from adapter
+ *
+ *	This routine handles a driver notify fib from the adapter and
+ *	dispatches it to the appropriate routine for handling.
+ */
+
+static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
+{
+    struct hw_fib * fib = fibptr->fib;
+    /*
+     * Set the status of this FIB to be Invalid parameter.
+     *
+     *	*(u32 *)fib->data = ST_INVAL;
+     */
+    *(u32 *)fib->data = cpu_to_le32(ST_OK);
+    fib_adapter_complete(fibptr, sizeof(u32));
+}
+
+/**
+ *	aac_command_thread	-	command processing thread
+ *	@dev: Adapter to monitor
+ *
+ *	Waits on the commandready event in it's queue. When the event gets set
+ *	it will pull FIBs off it's queue. It will continue to pull FIBs off
+ *	until the queue is empty. When the queue is empty it will wait for
+ *	more FIBs.
+ */
+ 
+#ifndef TRY_TASKLET
+DECLARE_TASKLET_DISABLED(aac_command_tasklet, aac_command_thread, 0);
+int aac_command_thread(struct aac_dev * dev)
+{
+#else
+int aac_command_thread(unsigned long data)
+{   
+    struct aac_dev *dev = (struct aac_dev *)data; 
+#endif
+    struct hw_fib *fib, *newfib;
+    struct fib fibptr; /* for error logging */
+    struct aac_queue_block *queues = dev->queues;
+    struct aac_fib_context *fibctx;
+    unsigned long flags;
+#if 0
+    DECLARE_WAITQUEUE(wait, current);
+#endif
+
+    /*
+     *	We can only have one thread per adapter for AIF's.
+     */
+    printk("aac_command_'thread': entered.\n"); 
+    if (dev->aif_thread)
+	return -EINVAL;
+
+#if 0
+    /*
+     *	Set up the name that will appear in 'ps'
+     *	stored in  task_struct.comm[16].
+     */
+    sprintf(current->comm, "aacraid");
+    daemonize();
+#endif
+
+    /*
+     *	Let the DPC know it has a place to send the AIF's to.
+     */
+    dev->aif_thread = 1;
+    memset(&fibptr, 0, sizeof(struct fib));
+#if 0
+    add_wait_queue(&queues->queue[HostNormCmdQueue].cmdready, &wait);
+    set_current_state(TASK_INTERRUPTIBLE);
+#endif
+//    while(1) 
+    {
+
+	printk("aac_command_thread: in 'loop'\n"); 
+	spin_lock_irqsave(queues->queue[HostNormCmdQueue].lock, flags);
+	printk("flags = %x\n", flags); 
+	while(!list_empty(&(queues->queue[HostNormCmdQueue].cmdq))) {
+	    struct list_head *entry;
+	    struct aac_aifcmd * aifcmd;
+
+#if 0
+	    set_current_state(TASK_RUNNING);
+#endif
+
+		
+	    entry = queues->queue[HostNormCmdQueue].cmdq.next;
+	    list_del(entry);
+			
+	    spin_unlock_irqrestore(queues->queue[HostNormCmdQueue].lock,flags);
+	    fib = list_entry(entry, struct hw_fib, header.FibLinks);
+	    printk("aac_command_thread: got fib \n"); 
+	    /*
+	     *	We will process the FIB here or pass it to a 
+	     *	worker thread that is TBD. We Really can't 
+	     *	do anything at this point since we don't have
+	     *	anything defined for this thread to do.
+	     */
+	    memset(&fibptr, 0, sizeof(struct fib));
+	    fibptr.type = FSAFS_NTC_FIB_CONTEXT;
+	    fibptr.size = sizeof( struct fib );
+	    fibptr.fib = fib;
+	    fibptr.data = fib->data;
+	    fibptr.dev = dev;
+	    /*
+	     *	We only handle AifRequest fibs from the adapter.
+	     */
+	    aifcmd = (struct aac_aifcmd *) fib->data;
+	    if (aifcmd->command == le16_to_cpu(AifCmdDriverNotify)) {
+		printk("aac_command_thread: handling aif... :-( \n"); 
+		aac_handle_aif(dev, &fibptr);
+	    } else {
+		/* The u32 here is important and intended. We are using
+		   32bit wrapping time to fit the adapter field */
+		u32 time_now, time_last;
+		unsigned long flagv;
+		
+		time_now = jiffies/HZ;
+
+		spin_lock_irqsave(&dev->fib_lock, flagv);
+		entry = dev->fib_list.next;
+				/*
+				 * For each Context that is on the 
+				 * fibctxList, make a copy of the
+				 * fib, and then set the event to wake up the
+				 * thread that is waiting for it.
+				 */
+		while (entry != &dev->fib_list) {
+		    /*
+		     * Extract the fibctx
+		     */
+		    fibctx = list_entry(entry, struct aac_fib_context, next);
+		    /*
+		     * Check if the queue is getting
+		     * backlogged
+		     */
+		    if (fibctx->count > 20)
+		    {
+			time_last = fibctx->jiffies;
+			/*
+			 * Has it been > 2 minutes 
+			 * since the last read off
+			 * the queue?
+			 */
+			if ((time_now - time_last) > 120) {
+			    entry = entry->next;
+			    aac_close_fib_context(dev, fibctx);
+			    continue;
+			}
+		    }
+		    /*
+		     * Warning: no sleep allowed while
+		     * holding spinlock
+		     */
+		    newfib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
+		    if (newfib) {
+			/*
+			 * Make the copy of the FIB
+			 */
+			memcpy(newfib, fib, sizeof(struct hw_fib));
+			/*
+			 * Put the FIB onto the
+			 * fibctx's fibs
+			 */
+			list_add_tail(&newfib->header.FibLinks, &fibctx->fibs);
+			fibctx->count++;
+#if 0
+			/* 
+			 * Set the event to wake up the
+			 * thread that will waiting.
+			 */
+			up(&fibctx->wait_sem);
+#endif
+		    } else {
+			printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
+		    }
+		    entry = entry->next;
+		}
+				/*
+				 *	Set the status of this FIB
+				 */
+		*(u32 *)fib->data = cpu_to_le32(ST_OK);
+		fib_adapter_complete(&fibptr, sizeof(u32));
+		spin_unlock_irqrestore(&dev->fib_lock, flagv);
+	    }
+	    spin_lock_irqsave(queues->queue[HostNormCmdQueue].lock, flags);
+	}
+	/*
+	 *	There are no more AIF's
+	 */
+	spin_unlock_irqrestore(queues->queue[HostNormCmdQueue].lock, flags);
+#if 0
+	schedule();
+
+	if(signal_pending(current))
+	    break;
+	set_current_state(TASK_INTERRUPTIBLE);
+#endif
+
+    }
+    
+#if 0
+    remove_wait_queue(&queues->queue[HostNormCmdQueue].cmdready, &wait);
+    dev->aif_thread = 0;
+    complete_and_exit(&dev->aif_completion, 0);
+#else
+    mdelay(50); 
+    dev->aif_thread = 0;
+
+#endif
+    return 0;
+}
diff --git a/xen/drivers/scsi/aacraid/dpcsup.c b/xen/drivers/scsi/aacraid/dpcsup.c
new file mode 100644
index 0000000000..c9b4dfe123
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/dpcsup.c
@@ -0,0 +1,207 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  dpcsup.c
+ *
+ * Abstract: All DPC processing routines for the cyclone board occur here.
+ *
+ *
+ */
+
+#include <xeno/config.h>
+/* #include <xeno/kernel.h> */
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/*  #include <xeno/spinlock.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/completion.h> */
+#include <xeno/blk.h>
+/*  #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+/**
+ *	aac_response_normal	-	Handle command replies
+ *	@q: Queue to read from
+ *
+ *	This DPC routine will be run when the adapter interrupts us to let us
+ *	know there is a response on our normal priority queue. We will pull off
+ *	all QE there are and wake up all the waiters before exiting. We will
+ *	take a spinlock out on the queue before operating on it.
+ */
+
+unsigned int aac_response_normal(struct aac_queue * q)
+{
+	struct aac_dev * dev = q->dev;
+	struct aac_entry *entry;
+	struct hw_fib * hwfib;
+	struct fib * fib;
+	int consumed = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->lock, flags);	
+
+	/*
+	 *	Keep pulling response QEs off the response queue and waking
+	 *	up the waiters until there are no more QEs. We then return
+	 *	back to the system. If no response was requesed we just
+	 *	deallocate the Fib here and continue.
+	 */
+	while(aac_consumer_get(dev, q, &entry))
+	{
+		int fast;
+
+		fast = (int) (entry->addr & 0x01);
+		hwfib = addr2fib(entry->addr & ~0x01);
+		aac_consumer_free(dev, q, HostNormRespQueue);
+		fib = &dev->fibs[hwfib->header.SenderData];
+		/*
+		 *	Remove this fib from the Outstanding I/O queue.
+		 *	But only if it has not already been timed out.
+		 *
+		 *	If the fib has been timed out already, then just 
+		 *	continue. The caller has already been notified that
+		 *	the fib timed out.
+		 */
+		if (!(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
+			list_del(&fib->queue);
+			dev->queues->queue[AdapNormCmdQueue].numpending--;
+		} else {
+			printk(KERN_WARNING "aacraid: FIB timeout (%x).\n", fib->flags);
+			continue;
+		}
+		spin_unlock_irqrestore(q->lock, flags);
+
+		if (fast) {
+			/*
+			 *	Doctor the fib
+			 */
+			*(u32 *)hwfib->data = cpu_to_le32(ST_OK);
+			hwfib->header.XferState |= cpu_to_le32(AdapterProcessed);
+		}
+
+		FIB_COUNTER_INCREMENT(aac_config.FibRecved);
+
+		if (hwfib->header.Command == cpu_to_le16(NuFileSystem))
+		{
+			u32 *pstatus = (u32 *)hwfib->data;
+			if (*pstatus & cpu_to_le32(0xffff0000))
+				*pstatus = cpu_to_le32(ST_OK);
+		}
+		if (hwfib->header.XferState & cpu_to_le32(NoResponseExpected | Async)) 
+		{
+	        	if (hwfib->header.XferState & cpu_to_le32(NoResponseExpected))
+				FIB_COUNTER_INCREMENT(aac_config.NoResponseRecved);
+			else 
+				FIB_COUNTER_INCREMENT(aac_config.AsyncRecved);
+			/*
+			 *	NOTE:  we cannot touch the fib after this
+			 *	    call, because it may have been deallocated.
+			 */
+			fib->callback(fib->callback_data, fib);
+		} else {
+#if 0
+			unsigned long flagv;
+			spin_lock_irqsave(&fib->event_lock, flagv);
+#endif
+			fib->done = 1;
+#if 0
+			up(&fib->event_wait);
+			spin_unlock_irqrestore(&fib->event_lock, flagv);
+#endif
+			FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
+		}
+		consumed++;
+		spin_lock_irqsave(q->lock, flags);
+	}
+
+	if (consumed > aac_config.peak_fibs)
+		aac_config.peak_fibs = consumed;
+	if (consumed == 0) 
+		aac_config.zero_fibs++;
+
+	spin_unlock_irqrestore(q->lock, flags);
+	return 0;
+}
+
+
+/**
+ *	aac_command_normal	-	handle commands
+ *	@q: queue to process
+ *
+ *	This DPC routine will be queued when the adapter interrupts us to 
+ *	let us know there is a command on our normal priority queue. We will 
+ *	pull off all QE there are and wake up all the waiters before exiting.
+ *	We will take a spinlock out on the queue before operating on it.
+ */
+ 
+unsigned int aac_command_normal(struct aac_queue *q)
+{
+	struct aac_dev * dev = q->dev;
+	struct aac_entry *entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->lock, flags);
+
+	/*
+	 *	Keep pulling response QEs off the response queue and waking
+	 *	up the waiters until there are no more QEs. We then return
+	 *	back to the system.
+	 */
+	while(aac_consumer_get(dev, q, &entry))
+	{
+		struct hw_fib * fib;
+		fib = addr2fib(entry->addr);
+
+		if (dev->aif_thread) {
+		        list_add_tail(&fib->header.FibLinks, &q->cmdq);
+	 	        aac_consumer_free(dev, q, HostNormCmdQueue);
+#if 0
+		        wake_up_interruptible(&q->cmdready);
+#endif
+		} else {
+			struct fib fibctx;
+	 	        aac_consumer_free(dev, q, HostNormCmdQueue);
+			spin_unlock_irqrestore(q->lock, flags);
+			memset(&fibctx, 0, sizeof(struct fib));
+			fibctx.type = FSAFS_NTC_FIB_CONTEXT;
+			fibctx.size = sizeof(struct fib);
+			fibctx.fib = fib;
+			fibctx.data = fib->data;
+			fibctx.dev = dev;
+			/*
+			 *	Set the status of this FIB
+			 */
+			*(u32 *)fib->data = cpu_to_le32(ST_OK);
+			fib_adapter_complete(&fibctx, sizeof(u32));
+			spin_lock_irqsave(q->lock, flags);
+		}		
+	}
+	spin_unlock_irqrestore(q->lock, flags);
+	return 0;
+}
diff --git a/xen/drivers/scsi/aacraid/linit.c b/xen/drivers/scsi/aacraid/linit.c
new file mode 100644
index 0000000000..b5026d9065
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/linit.c
@@ -0,0 +1,794 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *   linit.c
+ *
+ * Abstract: Linux Driver entry module for Adaptec RAID Array Controller
+ *				
+ *	Provides the following driver entry points:
+ *		aac_detect()
+ *		aac_release()
+ *		aac_queuecommand()
+ *		aac_resetcommand()
+ *		aac_biosparm()
+ *	
+ */
+
+#define AAC_DRIVER_VERSION		"0.9.9ac6-TEST"
+#define AAC_DRIVER_BUILD_DATE		__DATE__
+
+#include <xeno/module.h>
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+#include <xeno/spinlock.h>
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/completion.h> */
+/*  #include <asm/semaphore.h> */
+#include <xeno/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+#include "sd.h"
+
+#define AAC_DRIVERNAME	"aacraid"
+
+MODULE_AUTHOR("Red Hat Inc and Adaptec");
+MODULE_DESCRIPTION("Supports Dell PERC2, 2/Si, 3/Si, 3/Di, PERC 320/DC, Adaptec 2120S, 2200S, 5400S, and HP NetRAID-4M devices. http://domsch.com/xeno/ or http://linux.adaptec.com");
+MODULE_LICENSE("GPL");
+MODULE_PARM(nondasd, "i");
+MODULE_PARM_DESC(nondasd, "Control scanning of hba for nondasd devices. 0=off, 1=on");
+
+static int nondasd=-1;
+
+struct aac_dev *aac_devices[MAXIMUM_NUM_ADAPTERS];
+
+static unsigned aac_count = 0;
+static int aac_cfg_major = -1;
+
+/*
+ * Because of the way Linux names scsi devices, the order in this table has
+ * become important.  Check for on-board Raid first, add-in cards second.
+ *
+ * dmb - For now we add the number of channels to this structure.  
+ * In the future we should add a fib that reports the number of channels
+ * for the card.  At that time we can remove the channels from here
+ */
+ 
+static struct aac_driver_ident aac_drivers[] = {
+	{ 0x1028, 0x0001, 0x1028, 0x0001, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 2/Si */
+	{ 0x1028, 0x0002, 0x1028, 0x0002, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Di */
+	{ 0x1028, 0x0003, 0x1028, 0x0003, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Si */
+	{ 0x1028, 0x0004, 0x1028, 0x00d0, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Si */
+	{ 0x1028, 0x0002, 0x1028, 0x00d1, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Di */
+	{ 0x1028, 0x0002, 0x1028, 0x00d9, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Di */
+	{ 0x1028, 0x000a, 0x1028, 0x0106, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Di */
+	{ 0x1028, 0x000a, 0x1028, 0x011b, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Di */
+	{ 0x1028, 0x000a, 0x1028, 0x0121, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* PERC 3/Di */
+	{ 0x9005, 0x0283, 0x9005, 0x0283, aac_rx_init, "aacraid",  "ADAPTEC ", "catapult        ", 2 }, /* catapult*/
+	{ 0x9005, 0x0284, 0x9005, 0x0284, aac_rx_init, "aacraid",  "ADAPTEC ", "tomcat          ", 2 }, /* tomcat*/
+	{ 0x9005, 0x0285, 0x9005, 0x0286, aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2120S   ", 1 }, /* Adaptec 2120S (Crusader)*/
+	{ 0x9005, 0x0285, 0x9005, 0x0285, aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2200S   ", 2 }, /* Adaptec 2200S (Vulcan)*/
+	{ 0x9005, 0x0285, 0x9005, 0x0287, aac_rx_init, "aacraid",  "ADAPTEC ", "Adaptec 2200S   ", 2 }, /* Adaptec 2200S (Vulcan-2m)*/
+	{ 0x9005, 0x0285, 0x1028, 0x0287, aac_rx_init, "percraid", "DELL    ", "PERCRAID        ", 2 }, /* Dell PERC 320/DC */
+	{ 0x1011, 0x0046, 0x9005, 0x0365, aac_sa_init, "aacraid",  "ADAPTEC ", "Adaptec 5400S   ", 4 }, /* Adaptec 5400S (Mustang)*/
+	{ 0x1011, 0x0046, 0x9005, 0x0364, aac_sa_init, "aacraid",  "ADAPTEC ", "AAC-364         ", 4 }, /* Adaptec 5400S (Mustang)*/
+	{ 0x1011, 0x0046, 0x9005, 0x1364, aac_sa_init, "percraid", "DELL    ", "PERCRAID        ", 4 }, /* Dell PERC2 "Quad Channel" */
+	{ 0x1011, 0x0046, 0x103c, 0x10c2, aac_sa_init, "hpnraid",  "HP      ", "NetRAID-4M      ", 4 }  /* HP NetRAID-4M */
+};
+
+#define NUM_AACTYPES	(sizeof(aac_drivers) / sizeof(struct aac_driver_ident))
+static int num_aacdrivers = NUM_AACTYPES;
+
+#if 0
+static int aac_cfg_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
+static int aac_cfg_open(struct inode * inode, struct file * file);
+static int aac_cfg_release(struct inode * inode,struct file * file);
+
+static struct file_operations aac_cfg_fops = {
+/*	owner: THIS_MODULE, */
+	ioctl: aac_cfg_ioctl,
+	open: aac_cfg_open,
+	release: aac_cfg_release
+};
+#endif
+
+static int aac_detect(Scsi_Host_Template *);
+static int aac_release(struct Scsi_Host *);
+static int aac_queuecommand(Scsi_Cmnd *, void (*CompletionRoutine)(Scsi_Cmnd *));
+static int aac_biosparm(Scsi_Disk *, kdev_t, int *);
+#ifdef CONFIG_PROC_FS
+static int aac_procinfo(char *, char **, off_t, int, int, int);
+#endif
+static int aac_ioctl(Scsi_Device *, int, void *);
+static int aac_eh_abort(Scsi_Cmnd * cmd);
+static int aac_eh_device_reset(Scsi_Cmnd* cmd);
+static int aac_eh_bus_reset(Scsi_Cmnd* cmd);
+static int aac_eh_reset(Scsi_Cmnd* cmd);
+
+static void aac_queuedepth(struct Scsi_Host *, Scsi_Device *);
+
+/**
+ *	aac_detect	-	Probe for aacraid cards
+ *	@template: SCSI driver template
+ *
+ *	Probe for AAC Host Adapters initialize, register, and report the 
+ *	configuration of each AAC Host Adapter found.
+ *	Returns the number of adapters successfully initialized and 
+ *	registered.
+ *	Initializes all data necessary for this particular SCSI driver.
+ *	Notes:
+ *	The detect routine must not call any of the mid level functions 
+ *	to queue commands because things are not guaranteed to be set 
+ *	up yet. The detect routine can send commands to the host adapter 
+ *	as long as the program control will not be passed to scsi.c in 
+ *	the processing of the command. Note especially that 
+ *	scsi_malloc/scsi_free must not be called.
+ *
+ */
+static int aac_detect(Scsi_Host_Template *template)
+{
+    int index;
+    int container;
+    u16 vendor_id, device_id;
+    struct Scsi_Host *host_ptr;
+    struct pci_dev *dev = NULL;
+    struct aac_dev *aac;
+    struct fsa_scsi_hba *fsa_dev_ptr;
+    char *name = NULL;
+	
+    printk(KERN_INFO "Red Hat/Adaptec aacraid driver, %s\n", 
+	   AAC_DRIVER_BUILD_DATE);
+
+
+    /* 
+    ** XXX SMH: we need to take interrupts during detect, but the SCSI 
+    ** layer is holding this lock with interrupts disabled. I don't 
+    ** know how this works on vanilla linux (we 'down' on a semaphone 
+    ** at one point during the process -- how do we wake?) 
+    */
+    spin_unlock_irq(&io_request_lock);
+
+
+    /* setting up the proc directory structure */
+    template->proc_name = "aacraid";
+
+    for( index = 0; index != num_aacdrivers; index++ )
+    {
+	device_id = aac_drivers[index].device;
+	vendor_id = aac_drivers[index].vendor;
+	name = aac_drivers[index].name;
+	dprintk((KERN_DEBUG "Checking %s %x/%x/%x/%x.\n", 
+		 name, vendor_id, device_id,
+		 aac_drivers[index].subsystem_vendor,
+		 aac_drivers[index].subsystem_device));
+
+	dev = NULL;
+	while((dev = pci_find_device(vendor_id, device_id, dev))) {
+	    if (pci_enable_device(dev))
+		continue;
+	    pci_set_master(dev);
+	    pci_set_dma_mask(dev, 0xFFFFFFFFULL);
+
+	    if((dev->subsystem_vendor != aac_drivers[index].subsystem_vendor) || 
+	       (dev->subsystem_device != aac_drivers[index].subsystem_device))
+		continue;
+
+	    dprintk((KERN_DEBUG "%s device detected.\n", name));
+	    dprintk((KERN_DEBUG "%x/%x/%x/%x.\n", vendor_id, device_id, 
+		     aac_drivers[index].subsystem_vendor, 
+		     aac_drivers[index].subsystem_device));
+	    /* Increment the host adapter count */
+	    aac_count++;
+	    /*
+	     * scsi_register() allocates memory for a Scsi_Hosts
+	     * structure and links it into the linked list of host
+	     * adapters. This linked list contains the data for all
+	     * possible <supported> scsi hosts.  This is similar to
+	     * the Scsi_Host_Template, except that we have one entry
+	     * for each actual physical host adapter on the system,
+	     * stored as a linked list. If there are two AAC boards,
+	     * then we will need to make two Scsi_Host entries, but
+	     * there will be only one Scsi_Host_Template entry. The
+	     * second argument to scsi_register() specifies the size
+	     * of the extra memory we want to hold any device specific
+	     * information.  */
+	    host_ptr = scsi_register( template, sizeof(struct aac_dev) );
+	    /* 
+	     * These three parameters can be used to allow for wide SCSI 
+	     * and for host adapters that support multiple buses.
+	     */
+	    host_ptr->max_id = 17;
+	    host_ptr->max_lun = 8;
+	    host_ptr->max_channel = 1;
+	    host_ptr->irq = dev->irq;		/* Adapter IRQ number */
+	    /* host_ptr->base = ( char * )(dev->resource[0].start & ~0xff); */
+	    host_ptr->base = dev->resource[0].start;
+	    scsi_set_pci_device(host_ptr, dev);
+	    dprintk((KERN_DEBUG "Device base address = 0x%lx [0x%lx].\n", 
+		     host_ptr->base, dev->resource[0].start));
+	    dprintk((KERN_DEBUG "Device irq = 0x%x.\n", dev->irq));
+	    /*
+	     * The unique_id field is a unique identifier that must
+	     * be assigned so that we have some way of identifying
+	     * each host adapter properly and uniquely. For hosts 
+	     * that do not support more than one card in the
+	     * system, this does not need to be set. It is
+	     * initialized to zero in scsi_register(). This is the 
+	     * value returned as aac->id.
+	     */
+	    host_ptr->unique_id = aac_count - 1;
+	    /*
+	     *	This function is called after the device list has
+	     *	been built to find the tagged queueing depth 
+	     *	supported for each device.
+	     */
+	    host_ptr->select_queue_depths = aac_queuedepth;
+	    aac = (struct aac_dev *)host_ptr->hostdata;
+	    /* attach a pointer back to Scsi_Host */
+	    aac->scsi_host_ptr = host_ptr;	
+	    aac->pdev = dev;
+	    aac->cardtype =  index;
+	    aac->name = aac->scsi_host_ptr->hostt->name;
+	    aac->id = aac->scsi_host_ptr->unique_id;
+	    /* Initialize the ordinal number of the device to -1 */
+	    fsa_dev_ptr = &(aac->fsa_dev);
+	    for( container=0; container < MAXIMUM_NUM_CONTAINERS; container++)
+		fsa_dev_ptr->devno[container] = -1;
+
+	    dprintk((KERN_DEBUG "Initializing Hardware...\n"));
+
+	    if((*aac_drivers[index].init)(aac , host_ptr->unique_id) != 0)
+	    {
+		/* device initialization failed */
+		printk(KERN_WARNING 
+		       "aacraid: device initialization failed.\n");
+		scsi_unregister(host_ptr);
+		aac_count--;
+		continue;
+	    } 
+	    dprintk((KERN_DEBUG "%s:%d device initialization successful.\n", 
+		     name, host_ptr->unique_id));
+	    aac_get_adapter_info(aac);
+
+	    dprintk((KERN_DEBUG "%s got adapter info.\n", name));
+
+	    if(nondasd != -1) 
+	    {
+		/* someone told us how to set this on the cmdline */
+		aac->nondasd_support = (nondasd!=0);
+	    }
+	    if(aac->nondasd_support != 0){
+		printk(KERN_INFO "%s%d: Non-DASD support enabled\n", 
+		       aac->name, aac->id);
+	    }
+	    dprintk((KERN_DEBUG "%s:%d options flag %04x.\n", name, 
+		     host_ptr->unique_id, aac->adapter_info.options));
+	    if(aac->nondasd_support == 1)
+	    {
+		/*
+		 * max channel will be the physical
+		 * channels plus 1 virtual channel all
+		 * containers are on the virtual
+		 * channel 0 physical channels are
+		 * address by their actual physical
+		 * number+1 */
+		host_ptr->max_channel = aac_drivers[index].channels+1;
+	    } else {
+		host_ptr->max_channel = 1;
+	    }
+	    dprintk((KERN_DEBUG "Device has %d logical channels\n", 
+		     host_ptr->max_channel));
+	    aac_get_containers(aac);
+	    aac_devices[aac_count-1] = aac;
+
+	    /*
+	     * dmb - we may need to move these 3 parms somewhere else once
+	     * we get a fib that can report the actual numbers
+	     */
+	    host_ptr->max_id = AAC_MAX_TARGET;
+	    host_ptr->max_lun = AAC_MAX_LUN;
+			
+	    /*
+	     *  If we are PAE capable then our future DMA mappings
+	     *  (for read/write commands) are 64bit clean and don't 
+	     *  need bouncing. This assumes we do no other 32bit only
+	     *  allocations (eg fib table expands) after this point.
+	     */
+			 
+	    if(aac->pae_support)
+		pci_set_dma_mask(dev, 0xFFFFFFFFFFFFFFFFUL);
+	}
+    }
+
+    /* XXX SMH: restore lock and IPL for SCSI layer */
+    spin_lock_irq(&io_request_lock);
+
+
+#if 0
+    if( aac_count ){
+	if((aac_cfg_major = register_chrdev( 0, "aac", &aac_cfg_fops))<0)
+	    printk(KERN_WARNING "aacraid: unable to register 'aac' device.\n");
+    }
+#endif
+
+    template->present = aac_count; /* # of cards of this type found */
+    printk(KERN_DEBUG "aac_detect: returning %d\n", aac_count); 
+    return aac_count;
+}
+
+/**
+ *	aac_release	-	release SCSI host resources
+ *	@host_ptr: SCSI host to clean up
+ *
+ *	Release all resources previously acquired to support a specific Host 
+ *	Adapter and unregister the AAC Host Adapter.
+ *
+ *	BUGS: Does not wait for the thread it kills to die.
+ */
+
+static int aac_release(struct Scsi_Host *host_ptr)
+{
+    struct aac_dev *dev;
+    dprintk((KERN_DEBUG "aac_release.\n"));
+    dev = (struct aac_dev *)host_ptr->hostdata;
+    
+#if 0
+    /*
+     *	kill any threads we started
+     */
+    kill_proc(dev->thread_pid, SIGKILL, 0);
+    wait_for_completion(&dev->aif_completion);
+#endif
+    /*
+     *	Call the comm layer to detach from this adapter
+     */
+    aac_detach(dev);
+    /* Check free orderings... */
+    /* remove interrupt binding */
+    free_irq(host_ptr->irq, dev);
+    iounmap((void * )dev->regs.sa);
+    /* unregister adapter */
+    scsi_unregister(host_ptr);
+    /*
+     *	FIXME: This assumes no hot plugging is going on...
+     */
+    if( aac_cfg_major >= 0 )
+    {
+#if 0
+	unregister_chrdev(aac_cfg_major, "aac");
+#endif
+	aac_cfg_major = -1;
+    }
+    return 0;
+}
+
+/**
+ *	aac_queuecommand	-	queue a SCSI command
+ *	@scsi_cmnd_ptr:	SCSI command to queue
+ *	@CompletionRoutine: Function to call on command completion
+ *
+ *	Queues a command for execution by the associated Host Adapter.
+ */ 
+
+static int aac_queuecommand(Scsi_Cmnd *scsi_cmnd_ptr, void (*complete)(Scsi_Cmnd *))
+{
+    int ret;
+    
+    scsi_cmnd_ptr->scsi_done = complete;
+    /*
+     *	aac_scsi_cmd() handles command processing, setting the 
+     *	result code and calling completion routine. 
+     */
+    if((ret = aac_scsi_cmd(scsi_cmnd_ptr)) != 0)
+	dprintk((KERN_DEBUG "aac_scsi_cmd failed.\n"));
+    return ret;
+} 
+
+/**
+ *	aac_driverinfo		-	Returns the host adapter name
+ *	@host_ptr:	Scsi host to report on
+ *
+ *	Returns a static string describing the device in question
+ */
+
+const char *aac_driverinfo(struct Scsi_Host *host_ptr)
+{
+    struct aac_dev *dev = (struct aac_dev *)host_ptr->hostdata;
+    return aac_drivers[dev->cardtype].name;
+}
+
+/**
+ *	aac_get_driver_ident
+ * 	@devtype: index into lookup table
+ *
+ * 	Returns a pointer to the entry in the driver lookup table.
+ */
+struct aac_driver_ident* aac_get_driver_ident(int devtype)
+{
+	return  &aac_drivers[devtype];
+}
+
+/**
+ *	aac_biosparm	-	return BIOS parameters for disk
+ *	@disk: SCSI disk object to process
+ *	@device: kdev_t of the disk in question
+ *	@geom: geometry block to fill in
+ *
+ *	Return the Heads/Sectors/Cylinders BIOS Disk Parameters for Disk.  
+ *	The default disk geometry is 64 heads, 32 sectors, and the appropriate 
+ *	number of cylinders so as not to exceed drive capacity.  In order for 
+ *	disks equal to or larger than 1 GB to be addressable by the BIOS
+ *	without exceeding the BIOS limitation of 1024 cylinders, Extended 
+ *	Translation should be enabled.   With Extended Translation enabled, 
+ *	drives between 1 GB inclusive and 2 GB exclusive are given a disk 
+ *	geometry of 128 heads and 32 sectors, and drives above 2 GB inclusive 
+ *	are given a disk geometry of 255 heads and 63 sectors.  However, if 
+ *	the BIOS detects that the Extended Translation setting does not match 
+ *	the geometry in the partition table, then the translation inferred 
+ *	from the partition table will be used by the BIOS, and a warning may 
+ *	be displayed.
+ */
+ 
+static int aac_biosparm(Scsi_Disk *disk, kdev_t dev, int *geom)
+{
+    struct diskparm *param = (struct diskparm *)geom;
+    struct buffer_head * buf;
+    
+    dprintk((KERN_DEBUG "aac_biosparm.\n"));
+    
+    /*
+     *	Assuming extended translation is enabled - #REVISIT#
+     */
+    if( disk->capacity >= 2 * 1024 * 1024 ) /* 1 GB in 512 byte sectors */
+    {
+	if( disk->capacity >= 4 * 1024 * 1024 ) /* 2 GB in 512 byte sectors */
+	{
+	    param->heads = 255;
+	    param->sectors = 63;
+	}
+	else
+	{
+	    param->heads = 128;
+	    param->sectors = 32;
+	}
+    }
+    else
+    {
+	param->heads = 64;
+	param->sectors = 32;
+    }
+    
+    param->cylinders = disk->capacity/(param->heads * param->sectors);
+	
+#if 0
+    /*
+     *	Read the first 1024 bytes from the disk device
+     */
+    
+    buf = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev));
+    if(buf == NULL)
+	return 0;
+    /* 
+     *	If the boot sector partition table is valid, search for a partition 
+     *	table entry whose end_head matches one of the standard geometry 
+     *	translations ( 64/32, 128/32, 255/63 ).
+     */
+#endif
+
+	 
+    if(*(unsigned short *)(buf->b_data + 0x1fe) == cpu_to_le16(0xaa55))
+    {
+	struct partition *first = (struct partition * )(buf->b_data + 0x1be);
+	struct partition *entry = first;
+	int saved_cylinders = param->cylinders;
+	int num;
+	unsigned char end_head, end_sec;
+	
+	for(num = 0; num < 4; num++)
+	{
+	    end_head = entry->end_head;
+	    end_sec = entry->end_sector & 0x3f;
+	    
+	    if(end_head == 63)
+	    {
+		param->heads = 64;
+		param->sectors = 32;
+		break;
+	    }
+	    else if(end_head == 127)
+	    {
+		param->heads = 128;
+		param->sectors = 32;
+		break;
+	    }
+	    else if(end_head == 254) 
+	    {
+		param->heads = 255;
+		param->sectors = 63;
+		break;
+	    }
+	    entry++;
+	}
+	
+	if(num == 4)
+	{
+	    end_head = first->end_head;
+	    end_sec = first->end_sector & 0x3f;
+	}
+	
+	param->cylinders = disk->capacity / (param->heads * param->sectors);
+	
+	if(num < 4 && end_sec == param->sectors)
+	{
+	    if(param->cylinders != saved_cylinders)
+		dprintk((KERN_DEBUG "Adopting geometry: heads=%d, "
+			 "sectors=%d from partition table %d.\n",
+			 param->heads, param->sectors, num));
+	}
+	else if(end_head > 0 || end_sec > 0)
+	{
+	    dprintk((KERN_DEBUG "Strange geometry: heads=%d, "
+		     "sectors=%d in partition table %d.\n",
+		     end_head + 1, end_sec, num));
+	    dprintk((KERN_DEBUG "Using geometry: heads=%d, sectors=%d.\n",
+		     param->heads, param->sectors));
+	}
+    }
+#if 0
+    brelse(buf);
+#endif
+    return 0;
+}
+
+/**
+ *	aac_queuedepth		-	compute queue depths
+ *	@host:	SCSI host in question
+ *	@dev:	SCSI device we are considering
+ *
+ *	Selects queue depths for each target device based on the host adapter's
+ *	total capacity and the queue depth supported by the target device.
+ *	A queue depth of one automatically disables tagged queueing.
+ */
+
+static void aac_queuedepth(struct Scsi_Host * host, Scsi_Device * dev )
+{
+    Scsi_Device * dptr;
+    
+    dprintk((KERN_DEBUG "aac_queuedepth.\n"));
+    dprintk((KERN_DEBUG "Device #   Q Depth   Online\n"));
+    dprintk((KERN_DEBUG "---------------------------\n"));
+    for(dptr = dev; dptr != NULL; dptr = dptr->next)
+    {
+	if(dptr->host == host)
+	{
+	    dptr->queue_depth = 10;		
+	    dprintk((KERN_DEBUG "  %2d         %d        %d\n", 
+		     dptr->id, dptr->queue_depth, dptr->online));
+	}
+    }
+}
+
+
+/**
+ *	aac_eh_abort	-	Abort command if possible.
+ *	@cmd:	SCSI command block to abort
+ *
+ *	Called when the midlayer wishes to abort a command. We don't support
+ *	this facility, and our firmware looks after life for us. We just
+ *	report this as failing
+ */
+ 
+static int aac_eh_abort(Scsi_Cmnd *cmd)
+{
+    return FAILED;
+}
+
+/**
+ *	aac_eh_device_reset	-	Reset command handling
+ *	@cmd:	SCSI command block causing the reset
+ *
+ *	Issue a reset of a SCSI device. We are ourselves not truely a SCSI
+ *	controller and our firmware will do the work for us anyway. Thus this
+ *	is a no-op. We just return FAILED.
+ */
+
+static int aac_eh_device_reset(Scsi_Cmnd *cmd)
+{
+    return FAILED;
+}
+
+/**
+ *	aac_eh_bus_reset	-	Reset command handling
+ *	@scsi_cmd:	SCSI command block causing the reset
+ *
+ *	Issue a reset of a SCSI bus. We are ourselves not truely a SCSI
+ *	controller and our firmware will do the work for us anyway. Thus this
+ *	is a no-op. We just return FAILED.
+ */
+
+static int aac_eh_bus_reset(Scsi_Cmnd* cmd)
+{
+    return FAILED;
+}
+
+/**
+ *	aac_eh_hba_reset	-	Reset command handling
+ *	@scsi_cmd:	SCSI command block causing the reset
+ *
+ *	Issue a reset of a SCSI host. If things get this bad then arguably we should
+ *	go take a look at what the host adapter is doing and see if something really
+ *	broke (as can occur at least on my Dell QC card if a drive keeps failing spinup)
+ */
+
+static int aac_eh_reset(Scsi_Cmnd* cmd)
+{
+    printk(KERN_ERR "aacraid: Host adapter reset request. SCSI hang ?\n");
+    return FAILED;
+}
+
+/**
+ *	aac_ioctl 	-	Handle SCSI ioctls
+ *	@scsi_dev_ptr: scsi device to operate upon
+ *	@cmd: ioctl command to use issue
+ *	@arg: ioctl data pointer
+ *
+ *	Issue an ioctl on an aacraid device. Returns a standard unix error code or
+ *	zero for success
+ */
+ 
+static int aac_ioctl(Scsi_Device * scsi_dev_ptr, int cmd, void * arg)
+{
+    struct aac_dev *dev;
+    dprintk((KERN_DEBUG "aac_ioctl.\n"));
+    dev = (struct aac_dev *)scsi_dev_ptr->host->hostdata;
+    return aac_do_ioctl(dev, cmd, arg);
+}
+
+/**
+ *	aac_cfg_open		-	open a configuration file
+ *	@inode: inode being opened
+ *	@file: file handle attached
+ *
+ *	Called when the configuration device is opened. Does the needed
+ *	set up on the handle and then returns
+ *
+ *	Bugs: This needs extending to check a given adapter is present
+ *	so we can support hot plugging, and to ref count adapters.
+ */
+
+static int aac_cfg_open(struct inode * inode, struct file * file )
+{
+    unsigned minor_number = MINOR(inode->i_rdev);
+    if(minor_number >= aac_count)
+	return -ENODEV;
+    return 0;
+}
+
+/**
+ *	aac_cfg_release		-	close down an AAC config device
+ *	@inode: inode of configuration file
+ *	@file: file handle of configuration file
+ *	
+ *	Called when the last close of the configuration file handle
+ *	is performed.
+ */
+ 
+static int aac_cfg_release(struct inode * inode, struct file * file )
+{
+    return 0;
+}
+
+/**
+ *	aac_cfg_ioctl		-	AAC configuration request
+ *	@inode: inode of device
+ *	@file: file handle
+ *	@cmd: ioctl command code
+ *	@arg: argument
+ *
+ *	Handles a configuration ioctl. Currently this involves wrapping it
+ *	up and feeding it into the nasty windowsalike glue layer.
+ *
+ *	Bugs: Needs locking against parallel ioctls lower down
+ *	Bugs: Needs to handle hot plugging
+ */
+ 
+static int aac_cfg_ioctl(struct inode * inode,  struct file * file, unsigned int cmd, unsigned long arg )
+{
+    struct aac_dev *dev = aac_devices[MINOR(inode->i_rdev)];
+    return aac_do_ioctl(dev, cmd, (void *)arg);
+}
+
+/*
+ *	To use the low level SCSI driver support using the linux kernel loadable 
+ *	module interface we should initialize the global variable driver_interface  
+ *	(datatype Scsi_Host_Template) and then include the file scsi_module.c.
+ */
+ 
+static Scsi_Host_Template driver_template = {
+/*	module:			THIS_MODULE, */
+	name:           	"AAC",
+/*	proc_info:      	aac_procinfo, */
+	detect:         	aac_detect,
+	release:        	aac_release,
+	info:           	aac_driverinfo,
+	ioctl:          	aac_ioctl,
+	queuecommand:   	aac_queuecommand,
+	bios_param:     	aac_biosparm,	
+	can_queue:      	AAC_NUM_IO_FIB,	
+	this_id:        	16,
+	sg_tablesize:   	16,
+	max_sectors:    	128,
+	cmd_per_lun:    	AAC_NUM_IO_FIB,
+	eh_abort_handler:       aac_eh_abort,
+	eh_device_reset_handler:aac_eh_device_reset,
+	eh_bus_reset_handler:	aac_eh_bus_reset,
+	eh_host_reset_handler:	aac_eh_reset,
+	use_new_eh_code:	1, 
+
+	use_clustering:		ENABLE_CLUSTERING,
+};
+
+#include "../scsi_module.c.inc"
+
+#ifdef CONFIG_PROC_FS
+/**
+ *	aac_procinfo	-	Implement /proc/scsi/<drivername>/<n>
+ *	@proc_buffer: memory buffer for I/O
+ *	@start_ptr: pointer to first valid data
+ *	@offset: offset into file
+ *	@bytes_available: space left
+ *	@host_no: scsi host ident
+ *	@write: direction of I/O
+ *
+ *	Used to export driver statistics and other infos to the world outside 
+ *	the kernel using the proc file system. Also provides an interface to
+ *	feed the driver with information.
+ *
+ *		For reads
+ *			- if offset > 0 return 0
+ *			- if offset == 0 write data to proc_buffer and set the start_ptr to
+ *			beginning of proc_buffer, return the number of characters written.
+ *		For writes
+ *			- writes currently not supported, return 0
+ *
+ *	Bugs:	Only offset zero is handled
+ */
+
+static int aac_procinfo(char *proc_buffer, char **start_ptr,off_t offset,
+			int bytes_available, int host_no, int write)
+{
+    if(write || offset > 0)
+	return 0;
+    *start_ptr = proc_buffer;
+    return sprintf(proc_buffer, "%s  %d\n", 
+		   "Raid Controller, scsi hba number", host_no);
+}
+#endif
+
+EXPORT_NO_SYMBOLS;
diff --git a/xen/drivers/scsi/aacraid/rx.c b/xen/drivers/scsi/aacraid/rx.c
new file mode 100644
index 0000000000..e79ad49e74
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/rx.c
@@ -0,0 +1,457 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  rx.c
+ *
+ * Abstract: Hardware miniport for Drawbridge specific hardware functions.
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+#include <xeno/pci.h>
+/*  #include <xeno/spinlock.h> */
+/*  #include <xeno/slab.h> */
+#include <xeno/blk.h>
+#include <xeno/delay.h>
+/*  #include <xeno/completion.h> */
+/*  #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+static void aac_rx_intr(int irq, void *dev_id, struct pt_regs *regs)
+{
+    struct aac_dev *dev = dev_id;
+    unsigned long bellbits;
+    u8 intstat, mask;
+    intstat = rx_readb(dev, MUnit.OISR);
+    /*
+     *	Read mask and invert because drawbridge is reversed.
+     *	This allows us to only service interrupts that have 
+     *	been enabled.
+     */
+    mask = ~(rx_readb(dev, MUnit.OIMR));
+    /* Check to see if this is our interrupt.  If it isn't just return */
+    
+    if (intstat & mask) 
+    {
+	bellbits = rx_readl(dev, OutboundDoorbellReg);
+	if (bellbits & DoorBellPrintfReady) {
+	    aac_printf(dev, le32_to_cpu(rx_readl (dev, IndexRegs.Mailbox[5])));
+	    rx_writel(dev, MUnit.ODR,DoorBellPrintfReady);
+	    rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
+	}
+	else if (bellbits & DoorBellAdapterNormCmdReady) {
+	    aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+	    rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
+	}
+	else if (bellbits & DoorBellAdapterNormRespReady) {
+	    aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+	    rx_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
+	}
+	else if (bellbits & DoorBellAdapterNormCmdNotFull) {
+	    rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
+	}
+	else if (bellbits & DoorBellAdapterNormRespNotFull) {
+	    rx_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
+	    rx_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
+	}
+    }
+}
+
+/**
+ *	aac_rx_enable_interrupt	-	Enable event reporting
+ *	@dev: Adapter
+ *	@event: Event to enable
+ *
+ *	Enable event reporting from the i960 for a given event.
+ */
+ 
+static void aac_rx_enable_interrupt(struct aac_dev * dev, u32 event)
+{
+    switch (event) {
+	
+    case HostNormCmdQue:
+	dev->irq_mask &= ~(OUTBOUNDDOORBELL_1);
+	break;
+	
+    case HostNormRespQue:
+	dev->irq_mask &= ~(OUTBOUNDDOORBELL_2);
+	break;
+	
+    case AdapNormCmdNotFull:
+	dev->irq_mask &= ~(OUTBOUNDDOORBELL_3);
+	break;
+	
+    case AdapNormRespNotFull:
+	dev->irq_mask &= ~(OUTBOUNDDOORBELL_4);
+	break;
+    }
+}
+
+/**
+ *	aac_rx_disable_interrupt	-	Disable event reporting
+ *	@dev: Adapter
+ *	@event: Event to enable
+ *
+ *	Disable event reporting from the i960 for a given event.
+ */
+
+static void aac_rx_disable_interrupt(struct aac_dev *dev, u32 event)
+{
+    switch (event) {
+	
+    case HostNormCmdQue:
+	dev->irq_mask |= (OUTBOUNDDOORBELL_1);
+	break;
+	
+    case HostNormRespQue:
+	dev->irq_mask |= (OUTBOUNDDOORBELL_2);
+	break;
+	
+    case AdapNormCmdNotFull:
+	dev->irq_mask |= (OUTBOUNDDOORBELL_3);
+	break;
+	
+    case AdapNormRespNotFull:
+	dev->irq_mask |= (OUTBOUNDDOORBELL_4);
+	break;
+    }
+}
+
+/**
+ *	rx_sync_cmd	-	send a command and wait
+ *	@dev: Adapter
+ *	@command: Command to execute
+ *	@p1: first parameter
+ *	@ret: adapter status
+ *
+ *	This routine will send a synchronous comamnd to the adapter and wait 
+ *	for its	completion.
+ */
+
+static int rx_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *status)
+{
+    unsigned long start;
+    int ok;
+    /*
+     *	Write the command into Mailbox 0
+     */
+    rx_writel(dev, InboundMailbox0, cpu_to_le32(command));
+    /*
+     *	Write the parameters into Mailboxes 1 - 4
+     */
+    rx_writel(dev, InboundMailbox1, cpu_to_le32(p1));
+    rx_writel(dev, InboundMailbox2, 0);
+    rx_writel(dev, InboundMailbox3, 0);
+    rx_writel(dev, InboundMailbox4, 0);
+    /*
+     *	Clear the synch command doorbell to start on a clean slate.
+     */
+    rx_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
+    /*
+     *	Disable doorbell interrupts
+     */
+    rx_writeb(dev, MUnit.OIMR, rx_readb(dev, MUnit.OIMR) | 0x04);
+    /*
+     *	Force the completion of the mask register write before issuing
+     *	the interrupt.
+     */
+    rx_readb (dev, MUnit.OIMR);
+    /*
+     *	Signal that there is a new synch command
+     */
+    rx_writel(dev, InboundDoorbellReg, INBOUNDDOORBELL_0);
+    
+    ok = 0;
+    start = jiffies;
+    
+    /*
+     *	Wait up to 30 seconds
+     */
+    while (time_before(jiffies, start+30*HZ)) 
+    {
+	/* Delay 5 microseconds to let Mon960 get info. */
+	udelay(5);	
+	/*
+	 *	Mon960 will set doorbell0 bit when its completed the command.
+	 */
+	if (rx_readl(dev, OutboundDoorbellReg) & OUTBOUNDDOORBELL_0) {
+	    /*
+	     *	Clear the doorbell.
+	     */
+	    rx_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
+	    ok = 1;
+	    break;
+	}
+#if 0
+	/*
+	 *	Yield the processor in case we are slow 
+	 */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(1);
+#else 
+	/* XXX SMH: not in xen we don't */
+	mdelay(50); 
+#endif
+	
+    }
+    if (ok != 1) {
+	/*
+	 *	Restore interrupt mask even though we timed out
+	 */
+	rx_writeb(dev, MUnit.OIMR, rx_readl(dev, MUnit.OIMR) & 0xfb);
+	return -ETIMEDOUT;
+    }
+    /*
+     *	Pull the synch status from Mailbox 0.
+     */
+    *status = le32_to_cpu(rx_readl(dev, IndexRegs.Mailbox[0]));
+    /*
+     *	Clear the synch command doorbell.
+     */
+    rx_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
+    /*
+     *	Restore interrupt mask
+     */
+    rx_writeb(dev, MUnit.OIMR, rx_readl(dev, MUnit.OIMR) & 0xfb);
+    return 0;
+    
+}
+
+/**
+ *	aac_rx_interrupt_adapter	-	interrupt adapter
+ *	@dev: Adapter
+ *
+ *	Send an interrupt to the i960 and breakpoint it.
+ */
+
+static void aac_rx_interrupt_adapter(struct aac_dev *dev)
+{
+    u32 ret;
+    rx_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
+}
+
+/**
+ *	aac_rx_notify_adapter		-	send an event to the adapter
+ *	@dev: Adapter
+ *	@event: Event to send
+ *
+ *	Notify the i960 that something it probably cares about has
+ *	happened.
+ */
+
+static void aac_rx_notify_adapter(struct aac_dev *dev, u32 event)
+{
+    switch (event) {
+	
+    case AdapNormCmdQue:
+	rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_1);
+	break;
+    case HostNormRespNotFull:
+	rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_4);
+	break;
+    case AdapNormRespQue:
+	rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_2);
+	break;
+    case HostNormCmdNotFull:
+	rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
+	break;
+    case HostShutdown:
+//		rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, &ret);
+	break;
+    case FastIo:
+	rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
+	break;
+    case AdapPrintfDone:
+	rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_5);
+	break;
+    default:
+	BUG();
+	break;
+    }
+}
+
+/**
+ *	aac_rx_start_adapter		-	activate adapter
+ *	@dev:	Adapter
+ *
+ *	Start up processing on an i960 based AAC adapter
+ */
+
+static void aac_rx_start_adapter(struct aac_dev *dev)
+{
+    u32 status;
+    struct aac_init *init;
+    
+    init = dev->init;
+    printk("aac_rx_start: dev is %p, init is %p\n", dev, init); 
+    init->HostElapsedSeconds = cpu_to_le32(jiffies/HZ);
+    /*
+     *	Tell the adapter we are back and up and running so it will scan
+     *	its command queues and enable our interrupts
+     */
+    dev->irq_mask = (DoorBellPrintfReady | OUTBOUNDDOORBELL_1 | 
+		     OUTBOUNDDOORBELL_2 | OUTBOUNDDOORBELL_3 | 
+		     OUTBOUNDDOORBELL_4);
+    /*
+     *	First clear out all interrupts.  Then enable the one's that we
+     *	can handle.
+     */
+    rx_writeb(dev, MUnit.OIMR, 0xff);
+    rx_writel(dev, MUnit.ODR, 0xffffffff);
+//	rx_writeb(dev, MUnit.OIMR, ~(u8)OUTBOUND_DOORBELL_INTERRUPT_MASK);
+    rx_writeb(dev, MUnit.OIMR, 0xfb);
+    
+    // We can only use a 32 bit address here
+    rx_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, 
+		(u32)(ulong)dev->init_pa, &status);
+}
+
+/**
+ *	aac_rx_init	-	initialize an i960 based AAC card
+ *	@dev: device to configure
+ *	@devnum: adapter number
+ *
+ *	Allocate and set up resources for the i960 based AAC variants. The 
+ *	device_interface in the commregion will be allocated and linked 
+ *	to the comm region.
+ */
+
+int aac_rx_init(struct aac_dev *dev, unsigned long num)
+{
+    unsigned long start;
+    unsigned long status;
+    int instance;
+    const char * name;
+    
+    dev->devnum = num;
+    instance = dev->id;
+    name     = dev->name;
+    
+    dprintk((KERN_ERR "aac_rx_init called, num %ld, scsi host ptr = %p\n", 
+	     num, (void *)(dev->scsi_host_ptr))); 
+    
+    dprintk((KERN_ERR "scsi_host_ptr->base is %p\n", 
+	     (void *)dev->scsi_host_ptr->base)); 
+    /*
+     *	Map in the registers from the adapter.
+     */
+    if((dev->regs.rx = (struct rx_registers *)
+	ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+    {	
+	printk(KERN_WARNING "aacraid: unable to map i960.\n" );
+	return -1;
+    }
+    
+//	dprintk((KERN_ERR "aac_rx_init: AAA\n")); 
+    /*
+     *	Check to see if the board failed any self tests.
+     */
+    if (rx_readl(dev, IndexRegs.Mailbox[7]) & SELF_TEST_FAILED) {
+	printk(KERN_ERR "%s%d: adapter self-test failed.\n", 
+	       dev->name, instance);
+	return -1;
+    }
+    
+    
+//	dprintk((KERN_ERR "aac_rx_init: BBB\n")); 
+    /*
+     *	Check to see if the board panic'd while booting.
+     */
+    if (rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_PANIC) {
+	printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", 
+	       dev->name, instance);
+	return -1;
+    }
+    start = jiffies;
+    
+//	dprintk((KERN_ERR "aac_rx_init: DDD\n")); 
+    /*
+     *	Wait for the adapter to be up and running. Wait up to 3 minutes
+     */
+    while (!(rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_UP_AND_RUNNING)) 
+    {
+	if(time_after(jiffies, start+180*HZ))
+	{
+	    status = rx_readl(dev, IndexRegs.Mailbox[7]) >> 16;
+	    printk(KERN_ERR "%s%d: adapter kernel failed to start,"
+		   "init status = %ld.\n", dev->name, 
+		   instance, status);
+	    return -1;
+	}
+// dprintk((KERN_ERR "aac_rx_init: XXX\n")); 
+	
+#if 0 
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(1);
+#else
+	/* XXX SMH: no sleeping for us (we're the xen idle task) */
+	mdelay(50); 
+#endif
+	
+    }
+    
+//	dprintk((KERN_ERR "aac_rx_init: ZZZ!\n")); 
+    if (request_irq(dev->scsi_host_ptr->irq, aac_rx_intr, 
+		    SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev) < 0) 
+    {
+	printk(KERN_ERR "%s%d: Interrupt unavailable.\n", 
+	       name, instance);
+	return -1;
+    }
+    /*
+     *	Fill in the function dispatch table.
+     */
+    dev->a_ops.adapter_interrupt = aac_rx_interrupt_adapter;
+    dev->a_ops.adapter_enable_int = aac_rx_enable_interrupt;
+    dev->a_ops.adapter_disable_int = aac_rx_disable_interrupt;
+    dev->a_ops.adapter_notify = aac_rx_notify_adapter;
+    dev->a_ops.adapter_sync_cmd = rx_sync_cmd;
+    
+    if (aac_init_adapter(dev) == NULL)
+	return -1;
+#ifdef TRY_TASKLET
+    aac_command_tasklet.data = (unsigned long)dev;
+    tasklet_enable(&aac_command_tasklet);
+#else
+    /*
+     *	Start any kernel threads needed
+     */
+    dev->thread_pid = kernel_thread((int (*)(void *))aac_command_thread, 
+				    dev, 0);
+#endif 
+
+    /*
+     *	Tell the adapter that all is configured, and it can start
+     *	accepting requests
+     */
+    aac_rx_start_adapter(dev);
+    return 0;
+}
diff --git a/xen/drivers/scsi/aacraid/sa.c b/xen/drivers/scsi/aacraid/sa.c
new file mode 100644
index 0000000000..edb5679d41
--- /dev/null
+++ b/xen/drivers/scsi/aacraid/sa.c
@@ -0,0 +1,406 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.	<alan@redhat.com>
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  sa.c
+ *
+ * Abstract: Drawbridge specific support functions
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/kernel.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/sched.h>
+/*  #include <xeno/pci.h> */
+/*  #include <xeno/spinlock.h> */
+/*  #include <xeno/slab.h> */
+#include <xeno/blk.h>
+#include <xeno/delay.h>
+/*  #include <xeno/completion.h> */
+/*  #include <asm/semaphore.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#include "aacraid.h"
+
+static void aac_sa_intr(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct aac_dev *dev = dev_id;
+	unsigned short intstat, mask;
+
+	intstat = sa_readw(dev, DoorbellReg_p);
+	/*
+	 *	Read mask and invert because drawbridge is reversed.
+	 *	This allows us to only service interrupts that have been enabled.
+	 */
+	mask = ~(sa_readw(dev, SaDbCSR.PRISETIRQMASK));
+
+	/* Check to see if this is our interrupt.  If it isn't just return */
+
+	if (intstat & mask) {
+		if (intstat & PrintfReady) {
+			aac_printf(dev, le32_to_cpu(sa_readl(dev, Mailbox5)));
+			sa_writew(dev, DoorbellClrReg_p, PrintfReady); /* clear PrintfReady */
+			sa_writew(dev, DoorbellReg_s, PrintfDone);
+		} else if (intstat & DOORBELL_1) {	// dev -> Host Normal Command Ready
+			aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
+			sa_writew(dev, DoorbellClrReg_p, DOORBELL_1);
+		} else if (intstat & DOORBELL_2) {	// dev -> Host Normal Response Ready
+			aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
+			sa_writew(dev, DoorbellClrReg_p, DOORBELL_2);
+		} else if (intstat & DOORBELL_3) {	// dev -> Host Normal Command Not Full
+			sa_writew(dev, DoorbellClrReg_p, DOORBELL_3);
+		} else if (intstat & DOORBELL_4) {	// dev -> Host Normal Response Not Full
+			sa_writew(dev, DoorbellClrReg_p, DOORBELL_4);
+		}
+	}
+}
+
+/**
+ *	aac_sa_enable_interrupt	-	enable an interrupt event
+ *	@dev: Which adapter to enable.
+ *	@event: Which adapter event.
+ *
+ *	This routine will enable the corresponding adapter event to cause an interrupt on 
+ * 	the host.
+ */
+ 
+void aac_sa_enable_interrupt(struct aac_dev *dev, u32 event)
+{
+	switch (event) {
+
+	case HostNormCmdQue:
+		sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_1);
+		break;
+
+	case HostNormRespQue:
+		sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_2);
+		break;
+
+	case AdapNormCmdNotFull:
+		sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_3);
+		break;
+
+	case AdapNormRespNotFull:
+		sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, DOORBELL_4);
+		break;
+	}
+}
+
+/**
+ *	aac_sa_disable_interrupt	-	disable an interrupt event
+ *	@dev: Which adapter to enable.
+ *	@event: Which adapter event.
+ *
+ *	This routine will enable the corresponding adapter event to cause an interrupt on 
+ * 	the host.
+ */
+
+void aac_sa_disable_interrupt (struct aac_dev *dev, u32 event)
+{
+	switch (event) {
+
+	case HostNormCmdQue:
+		sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_1);
+		break;
+
+	case HostNormRespQue:
+		sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_2);
+		break;
+
+	case AdapNormCmdNotFull:
+		sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_3);
+		break;
+
+	case AdapNormRespNotFull:
+		sa_writew(dev, SaDbCSR.PRISETIRQMASK, DOORBELL_4);
+		break;
+	}
+}
+
+/**
+ *	aac_sa_notify_adapter		-	handle adapter notification
+ *	@dev:	Adapter that notification is for
+ *	@event:	Event to notidy
+ *
+ *	Notify the adapter of an event
+ */
+ 
+void aac_sa_notify_adapter(struct aac_dev *dev, u32 event)
+{
+	switch (event) {
+
+	case AdapNormCmdQue:
+		sa_writew(dev, DoorbellReg_s,DOORBELL_1);
+		break;
+	case HostNormRespNotFull:
+		sa_writew(dev, DoorbellReg_s,DOORBELL_4);
+		break;
+	case AdapNormRespQue:
+		sa_writew(dev, DoorbellReg_s,DOORBELL_2);
+		break;
+	case HostNormCmdNotFull:
+		sa_writew(dev, DoorbellReg_s,DOORBELL_3);
+		break;
+	case HostShutdown:
+		//sa_sync_cmd(dev, HOST_CRASHING, 0, &ret);
+		break;
+	case FastIo:
+		sa_writew(dev, DoorbellReg_s,DOORBELL_6);
+		break;
+	case AdapPrintfDone:
+		sa_writew(dev, DoorbellReg_s,DOORBELL_5);
+		break;
+	default:
+		BUG();
+		break;
+	}
+}
+
+
+/**
+ *	sa_sync_cmd	-	send a command and wait
+ *	@dev: Adapter
+ *	@command: Command to execute
+ *	@p1: first parameter
+ *	@ret: adapter status
+ *
+ *	This routine will send a synchronous comamnd to the adapter and wait 
+ *	for its	completion.
+ */
+
+static int sa_sync_cmd(struct aac_dev *dev, u32 command, u32 p1, u32 *ret)
+{
+	unsigned long start;
+ 	int ok;
+	/*
+	 *	Write the Command into Mailbox 0
+	 */
+	sa_writel(dev, Mailbox0, cpu_to_le32(command));
+	/*
+	 *	Write the parameters into Mailboxes 1 - 4
+	 */
+	sa_writel(dev, Mailbox1, cpu_to_le32(p1));
+	sa_writel(dev, Mailbox2, 0);
+	sa_writel(dev, Mailbox3, 0);
+	sa_writel(dev, Mailbox4, 0);
+	/*
+	 *	Clear the synch command doorbell to start on a clean slate.
+	 */
+	sa_writew(dev, DoorbellClrReg_p, DOORBELL_0);
+	/*
+	 *	Signal that there is a new synch command
+	 */
+	sa_writew(dev, DoorbellReg_s, DOORBELL_0);
+
+	ok = 0;
+	start = jiffies;
+
+	while(time_before(jiffies, start+30*HZ))
+	{
+		/*
+		 *	Delay 5uS so that the monitor gets access
+		 */
+		udelay(5);
+		/*
+		 *	Mon110 will set doorbell0 bit when it has 
+		 *	completed the command.
+		 */
+		if(sa_readw(dev, DoorbellReg_p) & DOORBELL_0)  {
+			ok = 1;
+			break;
+		}
+#if 0
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(1);
+#endif
+		mdelay(100); 
+
+	}
+
+	if (ok != 1)
+		return -ETIMEDOUT;
+	/*
+	 *	Clear the synch command doorbell.
+	 */
+	sa_writew(dev, DoorbellClrReg_p, DOORBELL_0);
+	/*
+	 *	Pull the synch status from Mailbox 0.
+	 */
+	*ret = le32_to_cpu(sa_readl(dev, Mailbox0));
+	return 0;
+}
+
+/**
+ *	aac_sa_interrupt_adapter	-	interrupt an adapter
+ *	@dev: Which adapter to enable.
+ *
+ *	Breakpoint an adapter.
+ */
+ 
+static void aac_sa_interrupt_adapter (struct aac_dev *dev)
+{
+	u32 ret;
+	sa_sync_cmd(dev, BREAKPOINT_REQUEST, 0, &ret);
+}
+
+/**
+ *	aac_sa_start_adapter		-	activate adapter
+ *	@dev:	Adapter
+ *
+ *	Start up processing on an ARM based AAC adapter
+ */
+
+static void aac_sa_start_adapter(struct aac_dev *dev)
+{
+	u32 ret;
+	struct aac_init *init;
+	/*
+	 * Fill in the remaining pieces of the init.
+	 */
+	init = dev->init;
+	init->HostElapsedSeconds = cpu_to_le32(jiffies/HZ);
+
+	dprintk(("INIT\n"));
+	/*
+	 * Tell the adapter we are back and up and running so it will scan its command
+	 * queues and enable our interrupts
+	 */
+	dev->irq_mask =	(PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4);
+	/*
+	 *	First clear out all interrupts.  Then enable the one's that 
+	 *	we can handle.
+	 */
+	dprintk(("MASK\n"));
+	sa_writew(dev, SaDbCSR.PRISETIRQMASK, cpu_to_le16(0xffff));
+	sa_writew(dev, SaDbCSR.PRICLEARIRQMASK, (PrintfReady | DOORBELL_1 | DOORBELL_2 | DOORBELL_3 | DOORBELL_4));
+	dprintk(("SYNCCMD\n"));
+	/* We can only use a 32 bit address here */
+	sa_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa, &ret);
+}
+
+/**
+ *	aac_sa_init	-	initialize an ARM based AAC card
+ *	@dev: device to configure
+ *	@devnum: adapter number
+ *
+ *	Allocate and set up resources for the ARM based AAC variants. The 
+ *	device_interface in the commregion will be allocated and linked 
+ *	to the comm region.
+ */
+
+int aac_sa_init(struct aac_dev *dev, unsigned long devnum)
+{
+	unsigned long start;
+	unsigned long status;
+	int instance;
+	const char *name;
+
+	dev->devnum = devnum;
+
+	dprintk(("PREINST\n"));
+	instance = dev->id;
+	name     = dev->name;
+
+	/*
+	 *	Map in the registers from the adapter.
+	 */
+	dprintk(("PREMAP\n"));
+
+	if((dev->regs.sa = (struct sa_registers *)ioremap((unsigned long)dev->scsi_host_ptr->base, 8192))==NULL)
+	{	
+		printk(KERN_WARNING "aacraid: unable to map ARM.\n" );
+		return -1;
+	}
+	/*
+	 *	Check to see if the board failed any self tests.
+	 */
+	if (sa_readl(dev, Mailbox7) & SELF_TEST_FAILED) {
+		printk(KERN_WARNING "%s%d: adapter self-test failed.\n", name, instance);
+		return -1;
+	}
+	/*
+	 *	Check to see if the board panic'd while booting.
+	 */
+	if (sa_readl(dev, Mailbox7) & KERNEL_PANIC) {
+		printk(KERN_WARNING "%s%d: adapter kernel panic'd.\n", name, instance);
+		return -1;
+	}
+	start = jiffies;
+	/*
+	 *	Wait for the adapter to be up and running. Wait up to 3 minutes.
+	 */
+	while (!(sa_readl(dev, Mailbox7) & KERNEL_UP_AND_RUNNING)) {
+		if (time_after(start+180*HZ, jiffies)) {
+			status = sa_readl(dev, Mailbox7) >> 16;
+			printk(KERN_WARNING "%s%d: adapter kernel failed to start, init status = %d.\n", name, instance, le32_to_cpu(status));
+			return -1;
+		}
+#if 0
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(1);
+#endif
+		mdelay(100); 
+	}
+
+	dprintk(("ATIRQ\n"));
+	if (request_irq(dev->scsi_host_ptr->irq, aac_sa_intr, SA_SHIRQ|SA_INTERRUPT, "aacraid", (void *)dev ) < 0) {
+		printk(KERN_WARNING "%s%d: Interrupt unavailable.\n", name, instance);
+		return -1;
+	}
+
+	/*
+	 *	Fill in the function dispatch table.
+	 */
+
+	dev->a_ops.adapter_interrupt = aac_sa_interrupt_adapter;
+	dev->a_ops.adapter_enable_int = aac_sa_enable_interrupt;
+	dev->a_ops.adapter_disable_int = aac_sa_disable_interrupt;
+	dev->a_ops.adapter_notify = aac_sa_notify_adapter;
+	dev->a_ops.adapter_sync_cmd = sa_sync_cmd;
+
+	dprintk(("FUNCDONE\n"));
+
+	if(aac_init_adapter(dev) == NULL)
+		return -1;
+
+	dprintk(("NEWADAPTDONE\n"));
+#if 0
+	/*
+	 *	Start any kernel threads needed
+	 */
+	dev->thread_pid = kernel_thread((int (*)(void *))aac_command_thread, dev, 0);
+#endif
+
+	/*
+	 *	Tell the adapter that all is configure, and it can start 
+	 *	accepting requests
+	 */
+	dprintk(("STARTING\n"));
+	aac_sa_start_adapter(dev);
+	dprintk(("STARTED\n"));
+	return 0;
+}
+
diff --git a/xen/drivers/scsi/constants.c b/xen/drivers/scsi/constants.c
new file mode 100644
index 0000000000..aea16f77d9
--- /dev/null
+++ b/xen/drivers/scsi/constants.c
@@ -0,0 +1,1005 @@
+/* 
+ * ASCII values for a number of symbolic constants, printing functions,
+ * etc.
+ * Additions for SCSI 2 and Linux 2.2.x by D. Gilbert (990422)
+ *
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/config.h>
+#include <xeno/blk.h>
+/*#include <linux/kernel.h> */
+#include "scsi.h"
+#include "hosts.h"
+
+#define CONST_COMMAND   0x01
+#define CONST_STATUS    0x02
+#define CONST_SENSE     0x04
+#define CONST_XSENSE    0x08
+#define CONST_CMND      0x10
+#define CONST_MSG       0x20
+#define CONST_HOST	0x40
+#define CONST_DRIVER	0x80
+
+static const char unknown[] = "UNKNOWN";
+
+#ifdef CONFIG_SCSI_CONSTANTS
+#ifdef CONSTANTS
+#undef CONSTANTS
+#endif
+#define CONSTANTS (CONST_COMMAND | CONST_STATUS | CONST_SENSE | CONST_XSENSE \
+		   | CONST_CMND | CONST_MSG | CONST_HOST | CONST_DRIVER)
+#endif
+
+#if (CONSTANTS & CONST_COMMAND)
+static const char * group_0_commands[] = {
+/* 00-03 */ "Test Unit Ready", "Rezero Unit", unknown, "Request Sense",
+/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reasssign Blocks",
+/* 08-0d */ "Read (6)", unknown, "Write (6)", "Seek (6)", unknown, unknown,
+/* 0e-12 */ unknown, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
+/* 13-16 */ "Verify", "Recover Buffered Data", "Mode Select", "Reserve",
+/* 17-1b */ "Release", "Copy", "Erase", "Mode Sense", "Start/Stop Unit",
+/* 1c-1d */ "Receive Diagnostic", "Send Diagnostic", 
+/* 1e-1f */ "Prevent/Allow Medium Removal", unknown,
+};
+
+
+static const char *group_1_commands[] = {
+/* 20-22 */  unknown, unknown, unknown,
+/* 23-28 */ unknown, "Define window parameters", "Read Capacity", 
+            unknown, unknown, "Read (10)", 
+/* 29-2d */ "Read Generation", "Write (10)", "Seek (10)", "Erase", 
+            "Read updated block", 
+/* 2e-31 */ "Write Verify","Verify", "Search High", "Search Equal", 
+/* 32-34 */ "Search Low", "Set Limits", "Prefetch or Read Position", 
+/* 35-37 */ "Synchronize Cache","Lock/Unlock Cache", "Read Defect Data", 
+/* 38-3c */ "Medium Scan", "Compare", "Copy Verify", "Write Buffer", 
+            "Read Buffer", 
+/* 3d-3f */ "Update Block", "Read Long",  "Write Long",
+};
+
+
+static const char *group_2_commands[] = {
+/* 40-41 */ "Change Definition", "Write Same", 
+/* 42-48 */ "Read sub-channel", "Read TOC", "Read header", 
+            "Play audio (10)", unknown, "Play audio msf",
+            "Play audio track/index", 
+/* 49-4f */ "Play track relative (10)", unknown, "Pause/resume", 
+            "Log Select", "Log Sense", unknown, unknown,
+/* 50-55 */ unknown, unknown, unknown, unknown, unknown, "Mode Select (10)",
+/* 56-5b */ unknown, unknown, unknown, unknown, "Mode Sense (10)", unknown,
+/* 5c-5f */ unknown, unknown, unknown,
+};
+
+
+/* The following are 16 byte commands in group 4 */
+static const char *group_4_commands[] = {
+/* 80-84 */ unknown, unknown, unknown, unknown, unknown,
+/* 85-89 */ "Memory Export In (16)", unknown, unknown, unknown,
+            "Memory Export Out (16)",
+/* 8a-8f */ unknown, unknown, unknown, unknown, unknown, unknown,
+/* 90-94 */ unknown, unknown, unknown, unknown, unknown,
+/* 95-99 */ unknown, unknown, unknown, unknown, unknown,
+/* 9a-9f */ unknown, unknown, unknown, unknown, unknown, unknown,
+};
+
+
+/* The following are 12 byte commands in group 5 */
+static const char *group_5_commands[] = {
+/* a0-a5 */ unknown, unknown, unknown, unknown, unknown,
+            "Move medium/play audio(12)",
+/* a6-a9 */ "Exchange medium", unknown, "Read(12)", "Play track relative(12)", 
+/* aa-ae */ "Write(12)", unknown, "Erase(12)", unknown, 
+            "Write and verify(12)", 
+/* af-b1 */ "Verify(12)", "Search data high(12)", "Search data equal(12)",
+/* b2-b4 */ "Search data low(12)", "Set limits(12)", unknown,
+/* b5-b6 */ "Request volume element address", "Send volume tag",
+/* b7-b9 */ "Read defect data(12)", "Read element status", unknown,
+/* ba-bf */ unknown, unknown, unknown, unknown, unknown, unknown,
+};
+
+
+
+#define group(opcode) (((opcode) >> 5) & 7)
+
+#define RESERVED_GROUP  0
+#define VENDOR_GROUP    1
+
+static const char **commands[] = {
+    group_0_commands, group_1_commands, group_2_commands, 
+    (const char **) RESERVED_GROUP, group_4_commands, 
+    group_5_commands, (const char **) VENDOR_GROUP, 
+    (const char **) VENDOR_GROUP
+};
+
+static const char reserved[] = "RESERVED";
+static const char vendor[] = "VENDOR SPECIFIC";
+
+static void print_opcode(int opcode) {
+    const char **table = commands[ group(opcode) ];
+    switch ((unsigned long) table) {
+    case RESERVED_GROUP:
+	printk("%s(0x%02x) ", reserved, opcode); 
+	break;
+    case VENDOR_GROUP:
+	printk("%s(0x%02x) ", vendor, opcode); 
+	break;
+    default:
+	if (table[opcode & 0x1f] != unknown)
+	    printk("%s ",table[opcode & 0x1f]);
+	else
+	    printk("%s(0x%02x) ", unknown, opcode);
+	break;
+    }
+}
+#else /* CONST & CONST_COMMAND */
+static void print_opcode(int opcode) {
+    printk("0x%02x ", opcode);
+}
+#endif  
+
+void print_command (unsigned char *command) {
+    int i,s;
+    print_opcode(command[0]);
+    for ( i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i) 
+	printk("%02x ", command[i]);
+    printk("\n");
+}
+
+#if (CONSTANTS & CONST_STATUS)
+static const char * statuses[] = {
+/* 0-4 */ "Good", "Check Condition", "Condition Met", unknown, "Busy", 
+/* 5-9 */ unknown, unknown, unknown, "Intermediate", unknown, 
+/* a-c */ "Intermediate-Condition Met", unknown, "Reservation Conflict",
+/* d-10 */ unknown, unknown, unknown, unknown,
+/* 11-14 */ "Command Terminated", unknown, unknown, "Queue Full",
+/* 15-1a */ unknown, unknown, unknown, unknown, unknown, unknown,
+/* 1b-1f */ unknown, unknown, unknown, unknown, unknown,
+};
+#endif
+
+void print_status (int status) {
+    status = (status >> 1) & 0x1f;
+#if (CONSTANTS & CONST_STATUS)
+    printk("%s ",statuses[status]);
+#else
+    printk("0x%0x ", status); 
+#endif 
+}
+
+#if (CONSTANTS & CONST_XSENSE)
+#define D 0x0001  /* DIRECT ACCESS DEVICE (disk) */
+#define T 0x0002  /* SEQUENTIAL ACCESS DEVICE (tape) */
+#define L 0x0004  /* PRINTER DEVICE */
+#define P 0x0008  /* PROCESSOR DEVICE */
+#define W 0x0010  /* WRITE ONCE READ MULTIPLE DEVICE */
+#define R 0x0020  /* READ ONLY (CD-ROM) DEVICE */
+#define S 0x0040  /* SCANNER DEVICE */
+#define O 0x0080  /* OPTICAL MEMORY DEVICE */
+#define M 0x0100  /* MEDIA CHANGER DEVICE */
+#define C 0x0200  /* COMMUNICATION DEVICE */
+#define A 0x0400  /* ARRAY STORAGE */
+#define E 0x0800  /* ENCLOSURE SERVICES DEVICE */
+#define B 0x1000  /* SIMPLIFIED DIRECT ACCESS DEVICE */
+#define K 0x2000  /* OPTICAL CARD READER/WRITER DEVICE */
+
+struct error_info{
+    unsigned char code1, code2;
+    unsigned short int devices;
+    const char * text;
+};
+
+struct error_info2{
+    unsigned char code1, code2_min, code2_max;
+    unsigned short int devices;
+    const char * text;
+};
+
+static struct error_info2 additional2[] =
+{
+  {0x40,0x00,0x7f,D,"Ram failure (%x)"},
+  {0x40,0x80,0xff,D|T|L|P|W|R|S|O|M|C,"Diagnostic failure on component (%x)"},
+  {0x41,0x00,0xff,D,"Data path failure (%x)"},
+  {0x42,0x00,0xff,D,"Power-on or self-test failure (%x)"},
+  {0, 0, 0, 0, NULL}
+};
+
+static struct error_info additional[] =
+{
+  {0x00,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"No additional sense information"},
+  {0x00,0x01,T,"Filemark detected"},
+  {0x00,0x02,T|S,"End-of-partition/medium detected"},
+  {0x00,0x03,T,"Setmark detected"},
+  {0x00,0x04,T|S,"Beginning-of-partition/medium detected"},
+  {0x00,0x05,T|L|S,"End-of-data detected"},
+  {0x00,0x06,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"I/O process terminated"},
+  {0x00,0x11,R,"Audio play operation in progress"},
+  {0x00,0x12,R,"Audio play operation paused"},
+  {0x00,0x13,R,"Audio play operation successfully completed"},
+  {0x00,0x14,R,"Audio play operation stopped due to error"},
+  {0x00,0x15,R,"No current audio status to return"},
+  {0x00,0x16,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Operation in progress"},
+  {0x00,0x17,D|T|L|W|R|S|O|M|A|E|B|K,"Cleaning requested"},
+  {0x01,0x00,D|W|O|B|K,"No index/sector signal"},
+  {0x02,0x00,D|W|R|O|M|B|K,"No seek complete"},
+  {0x03,0x00,D|T|L|W|S|O|B|K,"Peripheral device write fault"},
+  {0x03,0x01,T,"No write current"},
+  {0x03,0x02,T,"Excessive write errors"},
+  {0x04,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,cause not reportable"},
+  {0x04,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit is in process of becoming ready"},
+  {0x04,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,initializing cmd. required"},
+  {0x04,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,manual intervention required"},
+  {0x04,0x04,D|T|L|R|O|B,"Logical unit not ready,format in progress"},
+  {0x04,0x05,D|T|W|O|M|C|A|B|K,"Logical unit not ready,rebuild in progress"},
+  {0x04,0x06,D|T|W|O|M|C|A|B|K,"Logical unit not ready,recalculation in progress"},
+  {0x04,0x07,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,operation in progress"},
+  {0x04,0x08,R,"Logical unit not ready,long write in progress"},
+  {0x04,0x09,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not ready,self-test in progress"},
+  {0x05,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit does not respond to selection"},
+  {0x06,0x00,D|W|R|O|M|B|K,"No reference position found"},
+  {0x07,0x00,D|T|L|W|R|S|O|M|B|K,"Multiple peripheral devices selected"},
+  {0x08,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit communication failure"},
+  {0x08,0x01,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit communication time-out"},
+  {0x08,0x02,D|T|L|W|R|S|O|M|C|A|E|B|K,"Logical unit communication parity error"},
+  {0x08,0x03,D|T|R|O|M|B|K,"Logical unit communication CRC error (Ultra-DMA/32)"},
+  {0x08,0x04,D|T|L|P|W|R|S|O|C|K,"Unreachable copy target"},
+  {0x09,0x00,D|T|W|R|O|B,"Track following error"},
+  {0x09,0x01,W|R|O|K,"Tracking servo failure"},
+  {0x09,0x02,W|R|O|K,"Focus servo failure"},
+  {0x09,0x03,W|R|O,"Spindle servo failure"},
+  {0x09,0x04,D|T|W|R|O|B,"Head select fault"},
+  {0x0A,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Error log overflow"},
+  {0x0B,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Warning"},
+  {0x0B,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Warning - specified temperature exceeded"},
+  {0x0B,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Warning - enclosure degraded"},
+  {0x0C,0x00,T|R|S,"Write error"},
+  {0x0C,0x01,K,"Write error - recovered with auto reallocation"},
+  {0x0C,0x02,D|W|O|B|K,"Write error - auto reallocation failed"},
+  {0x0C,0x03,D|W|O|B|K,"Write error - recommend reassignment"},
+  {0x0C,0x04,D|T|W|O|B,"Compression check miscompare error"},
+  {0x0C,0x05,D|T|W|O|B,"Data expansion occurred during compression"},
+  {0x0C,0x06,D|T|W|O|B,"Block not compressible"},
+  {0x0C,0x07,R,"Write error - recovery needed"},
+  {0x0C,0x08,R,"Write error - recovery failed"},
+  {0x0C,0x09,R,"Write error - loss of streaming"},
+  {0x0C,0x0A,R,"Write error - padding blocks added"},
+  {0x10,0x00,D|W|O|B|K,"Id CRC or ECC error"},
+  {0x11,0x00,D|T|W|R|S|O|B|K,"Unrecovered read error"},
+  {0x11,0x01,D|T|W|R|S|O|B|K,"Read retries exhausted"},
+  {0x11,0x02,D|T|W|R|S|O|B|K,"Error too long to correct"},
+  {0x11,0x03,D|T|W|S|O|B|K,"Multiple read errors"},
+  {0x11,0x04,D|W|O|B|K,"Unrecovered read error - auto reallocate failed"},
+  {0x11,0x05,W|R|O|B,"L-EC uncorrectable error"},
+  {0x11,0x06,W|R|O|B,"CIRC unrecovered error"},
+  {0x11,0x07,W|O|B,"Data re-synchronization error"},
+  {0x11,0x08,T,"Incomplete block read"},
+  {0x11,0x09,T,"No gap found"},
+  {0x11,0x0A,D|T|O|B|K,"Miscorrected error"},
+  {0x11,0x0B,D|W|O|B|K,"Unrecovered read error - recommend reassignment"},
+  {0x11,0x0C,D|W|O|B|K,"Unrecovered read error - recommend rewrite the data"},
+  {0x11,0x0D,D|T|W|R|O|B,"De-compression CRC error"},
+  {0x11,0x0E,D|T|W|R|O|B,"Cannot decompress using declared algorithm"},
+  {0x11,0x0F,R,"Error reading UPC/EAN number"},
+  {0x11,0x10,R,"Error reading ISRC number"},
+  {0x11,0x11,R,"Read error - loss of streaming"},
+  {0x12,0x00,D|W|O|B|K,"Address mark not found for id field"},
+  {0x13,0x00,D|W|O|B|K,"Address mark not found for data field"},
+  {0x14,0x00,D|T|L|W|R|S|O|B|K,"Recorded entity not found"},
+  {0x14,0x01,D|T|W|R|O|B|K,"Record not found"},
+  {0x14,0x02,T,"Filemark or setmark not found"},
+  {0x14,0x03,T,"End-of-data not found"},
+  {0x14,0x04,T,"Block sequence error"},
+  {0x14,0x05,D|T|W|O|B|K,"Record not found - recommend reassignment"},
+  {0x14,0x06,D|T|W|O|B|K,"Record not found - data auto-reallocated"},
+  {0x15,0x00,D|T|L|W|R|S|O|M|B|K,"Random positioning error"},
+  {0x15,0x01,D|T|L|W|R|S|O|M|B|K,"Mechanical positioning error"},
+  {0x15,0x02,D|T|W|R|O|B|K,"Positioning error detected by read of medium"},
+  {0x16,0x00,D|W|O|B|K,"Data synchronization mark error"},
+  {0x16,0x01,D|W|O|B|K,"Data sync error - data rewritten"},
+  {0x16,0x02,D|W|O|B|K,"Data sync error - recommend rewrite"},
+  {0x16,0x03,D|W|O|B|K,"Data sync error - data auto-reallocated"},
+  {0x16,0x04,D|W|O|B|K,"Data sync error - recommend reassignment"},
+  {0x17,0x00,D|T|W|R|S|O|B|K,"Recovered data with no error correction applied"},
+  {0x17,0x01,D|T|W|R|S|O|B|K,"Recovered data with retries"},
+  {0x17,0x02,D|T|W|R|O|B|K,"Recovered data with positive head offset"},
+  {0x17,0x03,D|T|W|R|O|B|K,"Recovered data with negative head offset"},
+  {0x17,0x04,W|R|O|B,"Recovered data with retries and/or circ applied"},
+  {0x17,0x05,D|W|R|O|B|K,"Recovered data using previous sector id"},
+  {0x17,0x06,D|W|O|B|K,"Recovered data without ecc - data auto-reallocated"},
+  {0x17,0x07,D|W|R|O|B|K,"Recovered data without ecc - recommend reassignment"},
+  {0x17,0x08,D|W|R|O|B|K,"Recovered data without ecc - recommend rewrite"},
+  {0x17,0x09,D|W|R|O|B|K,"Recovered data without ecc - data rewritten"},
+  {0x18,0x00,D|T|W|R|O|B|K,"Recovered data with error correction applied"},
+  {0x18,0x01,D|W|R|O|B|K,"Recovered data with error corr. & retries applied"},
+  {0x18,0x02,D|W|R|O|B|K,"Recovered data - data auto-reallocated"},
+  {0x18,0x03,R,"Recovered data with CIRC"},
+  {0x18,0x04,R,"Recovered data with L-EC"},
+  {0x18,0x05,D|W|R|O|B|K,"Recovered data - recommend reassignment"},
+  {0x18,0x06,D|W|R|O|B|K,"Recovered data - recommend rewrite"},
+  {0x18,0x07,D|W|O|B|K,"Recovered data with ecc - data rewritten"},
+  {0x19,0x00,D|O|K,"Defect list error"},
+  {0x19,0x01,D|O|K,"Defect list not available"},
+  {0x19,0x02,D|O|K,"Defect list error in primary list"},
+  {0x19,0x03,D|O|K,"Defect list error in grown list"},
+  {0x1A,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Parameter list length error"},
+  {0x1B,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Synchronous data transfer error"},
+  {0x1C,0x00,D|O|B|K,"Defect list not found"},
+  {0x1C,0x01,D|O|B|K,"Primary defect list not found"},
+  {0x1C,0x02,D|O|B|K,"Grown defect list not found"},
+  {0x1D,0x00,D|T|W|R|O|B|K,"Miscompare during verify operation"},
+  {0x1E,0x00,D|W|O|B|K,"Recovered id with ecc correction"},
+  {0x1F,0x00,D|O|K,"Partial defect list transfer"},
+  {0x20,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid command operation code"},
+  {0x21,0x00,D|T|W|R|O|M|B|K,"Logical block address out of range"},
+  {0x21,0x01,D|T|W|R|O|M|B|K,"Invalid element address"},
+  {0x22,0x00,D,"Illegal function (use 20 00,24 00,or 26 00)"},
+  {0x24,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid field in cdb"},
+  {0x24,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"CDB decryption error"},
+  {0x25,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit not supported"},
+  {0x26,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid field in parameter list"},
+  {0x26,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Parameter not supported"},
+  {0x26,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Parameter value invalid"},
+  {0x26,0x03,D|T|L|P|W|R|S|O|M|C|A|E|K,"Threshold parameters not supported"},
+  {0x26,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid release of persistent reservation"},
+  {0x26,0x05,D|T|L|P|W|R|S|O|M|C|A|B|K,"Data decryption error"},
+  {0x26,0x06,D|T|L|P|W|R|S|O|C|K,"Too many target descriptors"},
+  {0x26,0x07,D|T|L|P|W|R|S|O|C|K,"Unsupported target descriptor type code"},
+  {0x26,0x08,D|T|L|P|W|R|S|O|C|K,"Too many segment descriptors"},
+  {0x26,0x09,D|T|L|P|W|R|S|O|C|K,"Unsupported segment descriptor type code"},
+  {0x26,0x0A,D|T|L|P|W|R|S|O|C|K,"Unexpected inexact segment"},
+  {0x26,0x0B,D|T|L|P|W|R|S|O|C|K,"Inline data length exceeded"},
+  {0x26,0x0C,D|T|L|P|W|R|S|O|C|K,"Invalid operation for copy source or destination"},
+  {0x26,0x0D,D|T|L|P|W|R|S|O|C|K,"Copy segment granularity violation"},
+  {0x27,0x00,D|T|W|R|O|B|K,"Write protected"},
+  {0x27,0x01,D|T|W|R|O|B|K,"Hardware write protected"},
+  {0x27,0x02,D|T|W|R|O|B|K,"Logical unit software write protected"},
+  {0x27,0x03,T|R,"Associated write protect"},
+  {0x27,0x04,T|R,"Persistent write protect"},
+  {0x27,0x05,T|R,"Permanent write protect"},
+  {0x28,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Not ready to ready change,medium may have changed"},
+  {0x28,0x01,D|T|W|R|O|M|B,"Import or export element accessed"},
+  {0x29,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Power on,reset,or bus device reset occurred"},
+  {0x29,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Power on occurred"},
+  {0x29,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Scsi bus reset occurred"},
+  {0x29,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Bus device reset function occurred"},
+  {0x29,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Device internal reset"},
+  {0x29,0x05,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Transceiver mode changed to single-ended"},
+  {0x29,0x06,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Transceiver mode changed to lvd"},
+  {0x2A,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Parameters changed"},
+  {0x2A,0x01,D|T|L|W|R|S|O|M|C|A|E|B|K,"Mode parameters changed"},
+  {0x2A,0x02,D|T|L|W|R|S|O|M|C|A|E|K,"Log parameters changed"},
+  {0x2A,0x03,D|T|L|P|W|R|S|O|M|C|A|E|K,"Reservations preempted"},
+  {0x2A,0x04,D|T|L|P|W|R|S|O|M|C|A|E,"Reservations released"},
+  {0x2A,0x05,D|T|L|P|W|R|S|O|M|C|A|E,"Registrations preempted"},
+  {0x2B,0x00,D|T|L|P|W|R|S|O|C|K,"Copy cannot execute since host cannot disconnect"},
+  {0x2C,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Command sequence error"},
+  {0x2C,0x01,S,"Too many windows specified"},
+  {0x2C,0x02,S,"Invalid combination of windows specified"},
+  {0x2C,0x03,R,"Current program area is not empty"},
+  {0x2C,0x04,R,"Current program area is empty"},
+  {0x2C,0x05,B,"Illegal power condition request"},
+  {0x2D,0x00,T,"Overwrite error on update in place"},
+  {0x2F,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Commands cleared by another initiator"},
+  {0x30,0x00,D|T|W|R|O|M|B|K,"Incompatible medium installed"},
+  {0x30,0x01,D|T|W|R|O|B|K,"Cannot read medium - unknown format"},
+  {0x30,0x02,D|T|W|R|O|B|K,"Cannot read medium - incompatible format"},
+  {0x30,0x03,D|T|R|K,"Cleaning cartridge installed"},
+  {0x30,0x04,D|T|W|R|O|B|K,"Cannot write medium - unknown format"},
+  {0x30,0x05,D|T|W|R|O|B|K,"Cannot write medium - incompatible format"},
+  {0x30,0x06,D|T|W|R|O|B,"Cannot format medium - incompatible medium"},
+  {0x30,0x07,D|T|L|W|R|S|O|M|A|E|B|K,"Cleaning failure"},
+  {0x30,0x08,R,"Cannot write - application code mismatch"},
+  {0x30,0x09,R,"Current session not fixated for append"},
+  {0x31,0x00,D|T|W|R|O|B|K,"Medium format corrupted"},
+  {0x31,0x01,D|L|R|O|B,"Format command failed"},
+  {0x32,0x00,D|W|O|B|K,"No defect spare location available"},
+  {0x32,0x01,D|W|O|B|K,"Defect list update failure"},
+  {0x33,0x00,T,"Tape length error"},
+  {0x34,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure failure"},
+  {0x35,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services failure"},
+  {0x35,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Unsupported enclosure function"},
+  {0x35,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services unavailable"},
+  {0x35,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services transfer failure"},
+  {0x35,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Enclosure services transfer refused"},
+  {0x36,0x00,L,"Ribbon,ink,or toner failure"},
+  {0x37,0x00,D|T|L|W|R|S|O|M|C|A|E|B|K,"Rounded parameter"},
+  {0x38,0x00,B,"Event status notification"},
+  {0x38,0x02,B,"Esn - power management class event"},
+  {0x38,0x04,B,"Esn - media class event"},
+  {0x38,0x06,B,"Esn - device busy class event"},
+  {0x39,0x00,D|T|L|W|R|S|O|M|C|A|E|K,"Saving parameters not supported"},
+  {0x3A,0x00,D|T|L|W|R|S|O|M|B|K,"Medium not present"},
+  {0x3A,0x01,D|T|W|R|O|M|B|K,"Medium not present - tray closed"},
+  {0x3A,0x02,D|T|W|R|O|M|B|K,"Medium not present - tray open"},
+  {0x3A,0x03,D|T|W|R|O|M|B,"Medium not present - loadable"},
+  {0x3A,0x04,D|T|W|R|O|M|B,"Medium not present - medium auxiliary memory accessible"},
+  {0x3B,0x00,T|L,"Sequential positioning error"},
+  {0x3B,0x01,T,"Tape position error at beginning-of-medium"},
+  {0x3B,0x02,T,"Tape position error at end-of-medium"},
+  {0x3B,0x03,L,"Tape or electronic vertical forms unit not ready"},
+  {0x3B,0x04,L,"Slew failure"},
+  {0x3B,0x05,L,"Paper jam"},
+  {0x3B,0x06,L,"Failed to sense top-of-form"},
+  {0x3B,0x07,L,"Failed to sense bottom-of-form"},
+  {0x3B,0x08,T,"Reposition error"},
+  {0x3B,0x09,S,"Read past end of medium"},
+  {0x3B,0x0A,S,"Read past beginning of medium"},
+  {0x3B,0x0B,S,"Position past end of medium"},
+  {0x3B,0x0C,T|S,"Position past beginning of medium"},
+  {0x3B,0x0D,D|T|W|R|O|M|B|K,"Medium destination element full"},
+  {0x3B,0x0E,D|T|W|R|O|M|B|K,"Medium source element empty"},
+  {0x3B,0x0F,R,"End of medium reached"},
+  {0x3B,0x11,D|T|W|R|O|M|B|K,"Medium magazine not accessible"},
+  {0x3B,0x12,D|T|W|R|O|M|B|K,"Medium magazine removed"},
+  {0x3B,0x13,D|T|W|R|O|M|B|K,"Medium magazine inserted"},
+  {0x3B,0x14,D|T|W|R|O|M|B|K,"Medium magazine locked"},
+  {0x3B,0x15,D|T|W|R|O|M|B|K,"Medium magazine unlocked"},
+  {0x3B,0x16,R,"Mechanical positioning or changer error"},
+  {0x3D,0x00,D|T|L|P|W|R|S|O|M|C|A|E|K,"Invalid bits in identify message"},
+  {0x3E,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit has not self-configured yet"},
+  {0x3E,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit failure"},
+  {0x3E,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Timeout on logical unit"},
+  {0x3E,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit failed self-test"},
+  {0x3E,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit unable to update self-test log"},
+  {0x3F,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Target operating conditions have changed"},
+  {0x3F,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Microcode has been changed"},
+  {0x3F,0x02,D|T|L|P|W|R|S|O|M|C|B|K,"Changed operating definition"},
+  {0x3F,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Inquiry data has changed"},
+  {0x3F,0x04,D|T|W|R|O|M|C|A|E|B|K,"Component device attached"},
+  {0x3F,0x05,D|T|W|R|O|M|C|A|E|B|K,"Device identifier changed"},
+  {0x3F,0x06,D|T|W|R|O|M|C|A|E|B,"Redundancy group created or modified"},
+  {0x3F,0x07,D|T|W|R|O|M|C|A|E|B,"Redundancy group deleted"},
+  {0x3F,0x08,D|T|W|R|O|M|C|A|E|B,"Spare created or modified"},
+  {0x3F,0x09,D|T|W|R|O|M|C|A|E|B,"Spare deleted"},
+  {0x3F,0x0A,D|T|W|R|O|M|C|A|E|B|K,"Volume set created or modified"},
+  {0x3F,0x0B,D|T|W|R|O|M|C|A|E|B|K,"Volume set deleted"},
+  {0x3F,0x0C,D|T|W|R|O|M|C|A|E|B|K,"Volume set deassigned"},
+  {0x3F,0x0D,D|T|W|R|O|M|C|A|E|B|K,"Volume set reassigned"},
+  {0x3F,0x0E,D|T|L|P|W|R|S|O|M|C|A|E,"Reported luns data has changed"},
+  {0x3F,0x10,D|T|W|R|O|M|B,"Medium loadable"},
+  {0x3F,0x11,D|T|W|R|O|M|B,"Medium auxiliary memory accessible"},
+  {0x40,0x00,D,"Ram failure (should use 40 nn)"},
+  /*
+   * FIXME(eric) - need a way to represent wildcards here.
+   */
+  {0x40,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Diagnostic failure on component nn (80h-ffh)"},
+  {0x41,0x00,D,"Data path failure (should use 40 nn)"},
+  {0x42,0x00,D,"Power-on or self-test failure (should use 40 nn)"},
+  {0x43,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Message error"},
+  {0x44,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Internal target failure"},
+  {0x45,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Select or reselect failure"},
+  {0x46,0x00,D|T|L|P|W|R|S|O|M|C|B|K,"Unsuccessful soft reset"},
+  {0x47,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Scsi parity error"},
+  {0x47,0x01,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Data phase CRC error detected"},
+  {0x47,0x02,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Scsi parity error detected during st data phase"},
+  {0x47,0x03,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Information unit CRC error detected"},
+  {0x47,0x04,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Asynchronous information protection error detected"},
+  {0x48,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Initiator detected error message received"},
+  {0x49,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Invalid message error"},
+  {0x4A,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Command phase error"},
+  {0x4B,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Data phase error"},
+  {0x4C,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Logical unit failed self-configuration"},
+  /*
+   * FIXME(eric) - need a way to represent wildcards here.
+   */
+  {0x4D,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Tagged overlapped commands (nn = queue tag)"},
+  {0x4E,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Overlapped commands attempted"},
+  {0x50,0x00,T,"Write append error"},
+  {0x50,0x01,T,"Write append position error"},
+  {0x50,0x02,T,"Position error related to timing"},
+  {0x51,0x00,T|R|O,"Erase failure"},
+  {0x52,0x00,T,"Cartridge fault"},
+  {0x53,0x00,D|T|L|W|R|S|O|M|B|K,"Media load or eject failed"},
+  {0x53,0x01,T,"Unload tape failure"},
+  {0x53,0x02,D|T|W|R|O|M|B|K,"Medium removal prevented"},
+  {0x54,0x00,P,"Scsi to host system interface failure"},
+  {0x55,0x00,P,"System resource failure"},
+  {0x55,0x01,D|O|B|K,"System buffer full"},
+  {0x55,0x02,D|T|L|P|W|R|S|O|M|A|E|K,"Insufficient reservation resources"},
+  {0x55,0x03,D|T|L|P|W|R|S|O|M|C|A|E,"Insufficient resources"},
+  {0x55,0x04,D|T|L|P|W|R|S|O|M|A|E,"Insufficient registration resources"},
+  {0x57,0x00,R,"Unable to recover table-of-contents"},
+  {0x58,0x00,O,"Generation does not exist"},
+  {0x59,0x00,O,"Updated block read"},
+  {0x5A,0x00,D|T|L|P|W|R|S|O|M|B|K,"Operator request or state change input"},
+  {0x5A,0x01,D|T|W|R|O|M|B|K,"Operator medium removal request"},
+  {0x5A,0x02,D|T|W|R|O|A|B|K,"Operator selected write protect"},
+  {0x5A,0x03,D|T|W|R|O|A|B|K,"Operator selected write permit"},
+  {0x5B,0x00,D|T|L|P|W|R|S|O|M|K,"Log exception"},
+  {0x5B,0x01,D|T|L|P|W|R|S|O|M|K,"Threshold condition met"},
+  {0x5B,0x02,D|T|L|P|W|R|S|O|M|K,"Log counter at maximum"},
+  {0x5B,0x03,D|T|L|P|W|R|S|O|M|K,"Log list codes exhausted"},
+  {0x5C,0x00,D|O,"Rpl status change"},
+  {0x5C,0x01,D|O,"Spindles synchronized"},
+  {0x5C,0x02,D|O,"Spindles not synchronized"},
+  {0x5D,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Failure prediction threshold exceeded"},
+  {0x5D,0x01,R|B,"Media failure prediction threshold exceeded"},
+  {0x5D,0x02,R,"Logical unit failure prediction threshold exceeded"},
+  {0x5D,0x10,D|B,"Hardware impending failure general hard drive failure"},
+  {0x5D,0x11,D|B,"Hardware impending failure drive error rate too high"},
+  {0x5D,0x12,D|B,"Hardware impending failure data error rate too high"},
+  {0x5D,0x13,D|B,"Hardware impending failure seek error rate too high"},
+  {0x5D,0x14,D|B,"Hardware impending failure too many block reassigns"},
+  {0x5D,0x15,D|B,"Hardware impending failure access times too high"},
+  {0x5D,0x16,D|B,"Hardware impending failure start unit times too high"},
+  {0x5D,0x17,D|B,"Hardware impending failure channel parametrics"},
+  {0x5D,0x18,D|B,"Hardware impending failure controller detected"},
+  {0x5D,0x19,D|B,"Hardware impending failure throughput performance"},
+  {0x5D,0x1A,D|B,"Hardware impending failure seek time performance"},
+  {0x5D,0x1B,D|B,"Hardware impending failure spin-up retry count"},
+  {0x5D,0x1C,D|B,"Hardware impending failure drive calibration retry count"},
+  {0x5D,0x20,D|B,"Controller impending failure general hard drive failure"},
+  {0x5D,0x21,D|B,"Controller impending failure drive error rate too high"},
+  {0x5D,0x22,D|B,"Controller impending failure data error rate too high"},
+  {0x5D,0x23,D|B,"Controller impending failure seek error rate too high"},
+  {0x5D,0x24,D|B,"Controller impending failure too many block reassigns"},
+  {0x5D,0x25,D|B,"Controller impending failure access times too high"},
+  {0x5D,0x26,D|B,"Controller impending failure start unit times too high"},
+  {0x5D,0x27,D|B,"Controller impending failure channel parametrics"},
+  {0x5D,0x28,D|B,"Controller impending failure controller detected"},
+  {0x5D,0x29,D|B,"Controller impending failure throughput performance"},
+  {0x5D,0x2A,D|B,"Controller impending failure seek time performance"},
+  {0x5D,0x2B,D|B,"Controller impending failure spin-up retry count"},
+  {0x5D,0x2C,D|B,"Controller impending failure drive calibration retry count"},
+  {0x5D,0x30,D|B,"Data channel impending failure general hard drive failure"},
+  {0x5D,0x31,D|B,"Data channel impending failure drive error rate too high"},
+  {0x5D,0x32,D|B,"Data channel impending failure data error rate too high"},
+  {0x5D,0x33,D|B,"Data channel impending failure seek error rate too high"},
+  {0x5D,0x34,D|B,"Data channel impending failure too many block reassigns"},
+  {0x5D,0x35,D|B,"Data channel impending failure access times too high"},
+  {0x5D,0x36,D|B,"Data channel impending failure start unit times too high"},
+  {0x5D,0x37,D|B,"Data channel impending failure channel parametrics"},
+  {0x5D,0x38,D|B,"Data channel impending failure controller detected"},
+  {0x5D,0x39,D|B,"Data channel impending failure throughput performance"},
+  {0x5D,0x3A,D|B,"Data channel impending failure seek time performance"},
+  {0x5D,0x3B,D|B,"Data channel impending failure spin-up retry count"},
+  {0x5D,0x3C,D|B,"Data channel impending failure drive calibration retry count"},
+  {0x5D,0x40,D|B,"Servo impending failure general hard drive failure"},
+  {0x5D,0x41,D|B,"Servo impending failure drive error rate too high"},
+  {0x5D,0x42,D|B,"Servo impending failure data error rate too high"},
+  {0x5D,0x43,D|B,"Servo impending failure seek error rate too high"},
+  {0x5D,0x44,D|B,"Servo impending failure too many block reassigns"},
+  {0x5D,0x45,D|B,"Servo impending failure access times too high"},
+  {0x5D,0x46,D|B,"Servo impending failure start unit times too high"},
+  {0x5D,0x47,D|B,"Servo impending failure channel parametrics"},
+  {0x5D,0x48,D|B,"Servo impending failure controller detected"},
+  {0x5D,0x49,D|B,"Servo impending failure throughput performance"},
+  {0x5D,0x4A,D|B,"Servo impending failure seek time performance"},
+  {0x5D,0x4B,D|B,"Servo impending failure spin-up retry count"},
+  {0x5D,0x4C,D|B,"Servo impending failure drive calibration retry count"},
+  {0x5D,0x50,D|B,"Spindle impending failure general hard drive failure"},
+  {0x5D,0x51,D|B,"Spindle impending failure drive error rate too high"},
+  {0x5D,0x52,D|B,"Spindle impending failure data error rate too high"},
+  {0x5D,0x53,D|B,"Spindle impending failure seek error rate too high"},
+  {0x5D,0x54,D|B,"Spindle impending failure too many block reassigns"},
+  {0x5D,0x55,D|B,"Spindle impending failure access times too high"},
+  {0x5D,0x56,D|B,"Spindle impending failure start unit times too high"},
+  {0x5D,0x57,D|B,"Spindle impending failure channel parametrics"},
+  {0x5D,0x58,D|B,"Spindle impending failure controller detected"},
+  {0x5D,0x59,D|B,"Spindle impending failure throughput performance"},
+  {0x5D,0x5A,D|B,"Spindle impending failure seek time performance"},
+  {0x5D,0x5B,D|B,"Spindle impending failure spin-up retry count"},
+  {0x5D,0x5C,D|B,"Spindle impending failure drive calibration retry count"},
+  {0x5D,0x60,D|B,"Firmware impending failure general hard drive failure"},
+  {0x5D,0x61,D|B,"Firmware impending failure drive error rate too high"},
+  {0x5D,0x62,D|B,"Firmware impending failure data error rate too high"},
+  {0x5D,0x63,D|B,"Firmware impending failure seek error rate too high"},
+  {0x5D,0x64,D|B,"Firmware impending failure too many block reassigns"},
+  {0x5D,0x65,D|B,"Firmware impending failure access times too high"},
+  {0x5D,0x66,D|B,"Firmware impending failure start unit times too high"},
+  {0x5D,0x67,D|B,"Firmware impending failure channel parametrics"},
+  {0x5D,0x68,D|B,"Firmware impending failure controller detected"},
+  {0x5D,0x69,D|B,"Firmware impending failure throughput performance"},
+  {0x5D,0x6A,D|B,"Firmware impending failure seek time performance"},
+  {0x5D,0x6B,D|B,"Firmware impending failure spin-up retry count"},
+  {0x5D,0x6C,D|B,"Firmware impending failure drive calibration retry count"},
+  {0x5D,0xFF,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Failure prediction threshold exceeded (false)"},
+  {0x5E,0x00,D|T|L|P|W|R|S|O|C|A|K,"Low power condition on"},
+  {0x5E,0x01,D|T|L|P|W|R|S|O|C|A|K,"Idle condition activated by timer"},
+  {0x5E,0x02,D|T|L|P|W|R|S|O|C|A|K,"Standby condition activated by timer"},
+  {0x5E,0x03,D|T|L|P|W|R|S|O|C|A|K,"Idle condition activated by command"},
+  {0x5E,0x04,D|T|L|P|W|R|S|O|C|A|K,"Standby condition activated by command"},
+  {0x5E,0x41,B,"Power state change to active"},
+  {0x5E,0x42,B,"Power state change to idle"},
+  {0x5E,0x43,B,"Power state change to standby"},
+  {0x5E,0x45,B,"Power state change to sleep"},
+  {0x5E,0x47,B|K,"Power state change to device control"},
+  {0x60,0x00,S,"Lamp failure"},
+  {0x61,0x00,S,"Video acquisition error"},
+  {0x61,0x01,S,"Unable to acquire video"},
+  {0x61,0x02,S,"Out of focus"},
+  {0x62,0x00,S,"Scan head positioning error"},
+  {0x63,0x00,R,"End of user area encountered on this track"},
+  {0x63,0x01,R,"Packet does not fit in available space"},
+  {0x64,0x00,R,"Illegal mode for this track"},
+  {0x64,0x01,R,"Invalid packet size"},
+  {0x65,0x00,D|T|L|P|W|R|S|O|M|C|A|E|B|K,"Voltage fault"},
+  {0x66,0x00,S,"Automatic document feeder cover up"},
+  {0x66,0x01,S,"Automatic document feeder lift up"},
+  {0x66,0x02,S,"Document jam in automatic document feeder"},
+  {0x66,0x03,S,"Document miss feed automatic in document feeder"},
+  {0x67,0x00,A,"Configuration failure"},
+  {0x67,0x01,A,"Configuration of incapable logical units failed"},
+  {0x67,0x02,A,"Add logical unit failed"},
+  {0x67,0x03,A,"Modification of logical unit failed"},
+  {0x67,0x04,A,"Exchange of logical unit failed"},
+  {0x67,0x05,A,"Remove of logical unit failed"},
+  {0x67,0x06,A,"Attachment of logical unit failed"},
+  {0x67,0x07,A,"Creation of logical unit failed"},
+  {0x67,0x08,A,"Assign failure occurred"},
+  {0x67,0x09,A,"Multiply assigned logical unit"},
+  {0x68,0x00,A,"Logical unit not configured"},
+  {0x69,0x00,A,"Data loss on logical unit"},
+  {0x69,0x01,A,"Multiple logical unit failures"},
+  {0x69,0x02,A,"Parity/data mismatch"},
+  {0x6A,0x00,A,"Informational,refer to log"},
+  {0x6B,0x00,A,"State change has occurred"},
+  {0x6B,0x01,A,"Redundancy level got better"},
+  {0x6B,0x02,A,"Redundancy level got worse"},
+  {0x6C,0x00,A,"Rebuild failure occurred"},
+  {0x6D,0x00,A,"Recalculate failure occurred"},
+  {0x6E,0x00,A,"Command to logical unit failed"},
+  {0x6F,0x00,R,"Copy protection key exchange failure - authentication failure"},
+  {0x6F,0x01,R,"Copy protection key exchange failure - key not present"},
+  {0x6F,0x02,R,"Copy protection key exchange failure - key not established"},
+  {0x6F,0x03,R,"Read of scrambled sector without authentication"},
+  {0x6F,0x04,R,"Media region code is mismatched to logical unit region"},
+  {0x6F,0x05,R,"Drive region must be permanent/region reset count error"},
+  /*
+   * FIXME(eric) - need a way to represent wildcards here.
+   */
+  {0x70,0x00,T,"Decompression exception short algorithm id of nn"},
+  {0x71,0x00,T,"Decompression exception long algorithm id"},
+  {0x72,0x00,R,"Session fixation error"},
+  {0x72,0x01,R,"Session fixation error writing lead-in"},
+  {0x72,0x02,R,"Session fixation error writing lead-out"},
+  {0x72,0x03,R,"Session fixation error - incomplete track in session"},
+  {0x72,0x04,R,"Empty or partially written reserved track"},
+  {0x72,0x05,R,"No more track reservations allowed"},
+  {0x73,0x00,R,"Cd control error"},
+  {0x73,0x01,R,"Power calibration area almost full"},
+  {0x73,0x02,R,"Power calibration area is full"},
+  {0x73,0x03,R,"Power calibration area error"},
+  {0x73,0x04,R,"Program memory area update failure"},
+  {0x73,0x05,R,"Program memory area is full"},
+  {0x73,0x06,R,"RMA/PMA is full"},
+  {0, 0, 0, NULL}
+};
+#endif
+
+#if (CONSTANTS & CONST_SENSE)
+static const char *snstext[] = {
+    "None",                     /* There is no sense information */
+    "Recovered Error",          /* The last command completed successfully
+                                   but used error correction */
+    "Not Ready",                /* The addressed target is not ready */
+    "Medium Error",             /* Data error detected on the medium */
+    "Hardware Error",           /* Controller or device failure */
+    "Illegal Request",
+    "Unit Attention",           /* Removable medium was changed, or
+                                   the target has been reset */
+    "Data Protect",             /* Access to the data is blocked */
+    "Blank Check",              /* Reached unexpected written or unwritten
+                                   region of the medium */
+    "Key=9",                    /* Vendor specific */
+    "Copy Aborted",             /* COPY or COMPARE was aborted */
+    "Aborted Command",          /* The target aborted the command */
+    "Equal",                    /* A SEARCH DATA command found data equal */
+    "Volume Overflow",          /* Medium full with still data to be written */
+    "Miscompare",               /* Source data and data on the medium
+                                   do not agree */
+    "Key=15"                    /* Reserved */
+};
+#endif
+
+/* Print sense information */
+static 
+void print_sense_internal(const char * devclass, 
+			  const unsigned char * sense_buffer,
+			  kdev_t dev)
+{
+    int i, s;
+    int sense_class, valid, code, info;
+    const char * error = NULL;
+    
+    sense_class = (sense_buffer[0] >> 4) & 0x07;
+    code = sense_buffer[0] & 0xf;
+    valid = sense_buffer[0] & 0x80;
+    
+    if (sense_class == 7) {	/* extended sense data */
+	s = sense_buffer[7] + 8;
+	if(s > SCSI_SENSE_BUFFERSIZE)
+	   s = SCSI_SENSE_BUFFERSIZE;
+	
+	info = ((sense_buffer[3] << 24) | (sense_buffer[4] << 16) |
+		(sense_buffer[5] << 8) | sense_buffer[6]);
+	if (info || valid) {
+		printk("Info fld=0x%x", info);
+		if (!valid)	/* info data not according to standard */
+			printk(" (nonstd)");
+		printk(", ");
+	}
+	if (sense_buffer[2] & 0x80)
+           printk( "FMK ");	/* current command has read a filemark */
+	if (sense_buffer[2] & 0x40)
+           printk( "EOM ");	/* end-of-medium condition exists */
+	if (sense_buffer[2] & 0x20)
+           printk( "ILI ");	/* incorrect block length requested */
+	
+	switch (code) {
+	case 0x0:
+	    error = "Current";	/* error concerns current command */
+	    break;
+	case 0x1:
+	    error = "Deferred";	/* error concerns some earlier command */
+            	/* e.g., an earlier write to disk cache succeeded, but
+                   now the disk discovers that it cannot write the data */
+	    break;
+	default:
+	    error = "Invalid";
+	}
+	
+	printk("%s ", error);
+	
+#if (CONSTANTS & CONST_SENSE)
+	printk( "%s%s: sense key %s\n", devclass,
+	       kdevname(dev), snstext[sense_buffer[2] & 0x0f]);
+#else
+	printk("%s%s: sns = %2x %2x\n", devclass,
+	       kdevname(dev), sense_buffer[0], sense_buffer[2]);
+#endif
+	
+	/* Check to see if additional sense information is available */
+	if(sense_buffer[7] + 7 < 13 ||
+	   (sense_buffer[12] == 0  && sense_buffer[13] ==  0)) goto done;
+	
+#if (CONSTANTS & CONST_XSENSE)
+	for(i=0; additional[i].text; i++)
+	    if(additional[i].code1 == sense_buffer[12] &&
+	       additional[i].code2 == sense_buffer[13])
+		printk("Additional sense indicates %s\n", additional[i].text);
+	
+	for(i=0; additional2[i].text; i++)
+	    if(additional2[i].code1 == sense_buffer[12] &&
+	       additional2[i].code2_min >= sense_buffer[13]  &&
+	       additional2[i].code2_max <= sense_buffer[13]) {
+		printk("Additional sense indicates ");
+		printk(additional2[i].text, sense_buffer[13]);
+		printk("\n");
+	    };
+#else
+	printk("ASC=%2x ASCQ=%2x\n", sense_buffer[12], sense_buffer[13]);
+#endif
+    } else {	/* non-extended sense data */
+
+         /*
+          * Standard says:
+          *    sense_buffer[0] & 0200 : address valid
+          *    sense_buffer[0] & 0177 : vendor-specific error code
+          *    sense_buffer[1] & 0340 : vendor-specific
+          *    sense_buffer[1..3] : 21-bit logical block address
+          */
+	
+#if (CONSTANTS & CONST_SENSE)
+	if (sense_buffer[0] < 15)
+	    printk("%s%s: old sense key %s\n", devclass,
+	      kdevname(dev), snstext[sense_buffer[0] & 0x0f]);
+	else
+#endif
+	    printk("%s%s: sns = %2x %2x\n", devclass,
+	      kdevname(dev), sense_buffer[0], sense_buffer[2]);
+	
+	printk("Non-extended sense class %d code 0x%0x\n", sense_class, code);
+	s = 4;
+    }
+    
+ done:
+#if !(CONSTANTS & CONST_SENSE)
+    printk("Raw sense data:");
+    for (i = 0; i < s; ++i) 
+	printk("0x%02x ", sense_buffer[i]);
+    printk("\n");
+#endif
+    return;
+}
+
+void print_sense(const char * devclass, Scsi_Cmnd * SCpnt)
+{
+	print_sense_internal(devclass, SCpnt->sense_buffer,
+			     SCpnt->request.rq_dev);
+}
+
+void print_req_sense(const char * devclass, Scsi_Request * SRpnt)
+{
+	print_sense_internal(devclass, SRpnt->sr_sense_buffer,
+			     SRpnt->sr_request.rq_dev);
+}
+
+#if (CONSTANTS & CONST_MSG) 
+static const char *one_byte_msgs[] = {
+/* 0x00 */ "Command Complete", NULL, "Save Pointers",
+/* 0x03 */ "Restore Pointers", "Disconnect", "Initiator Error", 
+/* 0x06 */ "Abort", "Message Reject", "Nop", "Message Parity Error",
+/* 0x0a */ "Linked Command Complete", "Linked Command Complete w/flag",
+/* 0x0c */ "Bus device reset", "Abort Tag", "Clear Queue", 
+/* 0x0f */ "Initiate Recovery", "Release Recovery"
+};
+
+#define NO_ONE_BYTE_MSGS (sizeof(one_byte_msgs)  / sizeof (const char *))
+
+static const char *two_byte_msgs[] = {
+/* 0x20 */ "Simple Queue Tag", "Head of Queue Tag", "Ordered Queue Tag"
+/* 0x23 */ "Ignore Wide Residue"
+};
+
+#define NO_TWO_BYTE_MSGS (sizeof(two_byte_msgs)  / sizeof (const char *))
+
+static const char *extended_msgs[] = {
+/* 0x00 */ "Modify Data Pointer", "Synchronous Data Transfer Request",
+/* 0x02 */ "SCSI-I Extended Identify", "Wide Data Transfer Request"
+};
+
+#define NO_EXTENDED_MSGS (sizeof(two_byte_msgs)  / sizeof (const char *))
+#endif /* (CONSTANTS & CONST_MSG) */
+
+int print_msg (const unsigned char *msg) {
+    int len = 0, i;
+    if (msg[0] == EXTENDED_MESSAGE) {
+	len = 3 + msg[1];
+#if (CONSTANTS & CONST_MSG)
+	if (msg[2] < NO_EXTENDED_MSGS)
+	    printk ("%s ", extended_msgs[msg[2]]); 
+	else 
+	    printk ("Extended Message, reserved code (0x%02x) ", (int) msg[2]);
+	switch (msg[2]) {
+	case EXTENDED_MODIFY_DATA_POINTER:
+	    printk("pointer = %d", (int) (msg[3] << 24) | (msg[4] << 16) | 
+		   (msg[5] << 8) | msg[6]);
+	    break;
+	case EXTENDED_SDTR:
+	    printk("period = %d ns, offset = %d", (int) msg[3] * 4, (int) 
+		   msg[4]);
+	    break;
+	case EXTENDED_WDTR:
+	    printk("width = 2^%d bytes", msg[3]);
+	    break;
+	default:
+	    for (i = 2; i < len; ++i) 
+		printk("%02x ", msg[i]);
+	}
+#else
+	for (i = 0; i < len; ++i)
+	    printk("%02x ", msg[i]);
+#endif
+	/* Identify */
+    } else if (msg[0] & 0x80) {
+#if (CONSTANTS & CONST_MSG)
+	printk("Identify disconnect %sallowed %s %d ",
+	       (msg[0] & 0x40) ? "" : "not ",
+	       (msg[0] & 0x20) ? "target routine" : "lun",
+	       msg[0] & 0x7);
+#else
+	printk("%02x ", msg[0]);
+#endif
+	len = 1;
+	/* Normal One byte */
+    } else if (msg[0] < 0x1f) {
+#if (CONSTANTS & CONST_MSG)
+	if (msg[0] < NO_ONE_BYTE_MSGS)
+	    printk(one_byte_msgs[msg[0]]);
+	else
+	    printk("reserved (%02x) ", msg[0]);
+#else
+	printk("%02x ", msg[0]);
+#endif
+	len = 1;
+	/* Two byte */
+    } else if (msg[0] <= 0x2f) {
+#if (CONSTANTS & CONST_MSG)
+	if ((msg[0] - 0x20) < NO_TWO_BYTE_MSGS)
+	    printk("%s %02x ", two_byte_msgs[msg[0] - 0x20], 
+		   msg[1]);
+	else 
+	    printk("reserved two byte (%02x %02x) ", 
+		   msg[0], msg[1]);
+#else
+	printk("%02x %02x", msg[0], msg[1]);
+#endif
+	len = 2;
+    } else 
+#if (CONSTANTS & CONST_MSG)
+	printk(reserved);
+#else
+    printk("%02x ", msg[0]);
+#endif
+    return len;
+}
+
+void print_Scsi_Cmnd (Scsi_Cmnd *cmd) {
+    printk("scsi%d : destination target %d, lun %d\n", 
+	   cmd->host->host_no, 
+	   cmd->target, 
+	   cmd->lun);
+    printk("        command = ");
+    print_command (cmd->cmnd);
+}
+
+#if (CONSTANTS & CONST_HOST)
+static const char * hostbyte_table[]={
+"DID_OK", "DID_NO_CONNECT", "DID_BUS_BUSY", "DID_TIME_OUT", "DID_BAD_TARGET", 
+"DID_ABORT", "DID_PARITY", "DID_ERROR", "DID_RESET", "DID_BAD_INTR",
+"DID_PASSTHROUGH", "DID_SOFT_ERROR", NULL};
+
+void print_hostbyte(int scsiresult)
+{   static int maxcode=0;
+    int i;
+   
+    if(!maxcode) {
+	for(i=0;hostbyte_table[i];i++) ;
+	maxcode=i-1;
+    }
+    printk("Hostbyte=0x%02x",host_byte(scsiresult));
+    if(host_byte(scsiresult)>maxcode) {
+	printk("is invalid "); 
+	return;
+    }
+    printk("(%s) ",hostbyte_table[host_byte(scsiresult)]);
+}
+#else
+void print_hostbyte(int scsiresult)
+{   printk("Hostbyte=0x%02x ",host_byte(scsiresult));
+}
+#endif
+
+#if (CONSTANTS & CONST_DRIVER)
+static const char * driverbyte_table[]={
+"DRIVER_OK", "DRIVER_BUSY", "DRIVER_SOFT",  "DRIVER_MEDIA", "DRIVER_ERROR", 
+"DRIVER_INVALID", "DRIVER_TIMEOUT", "DRIVER_HARD",NULL };
+
+static const char * driversuggest_table[]={"SUGGEST_OK",
+"SUGGEST_RETRY", "SUGGEST_ABORT", "SUGGEST_REMAP", "SUGGEST_DIE",
+unknown,unknown,unknown, "SUGGEST_SENSE",NULL};
+
+
+void print_driverbyte(int scsiresult)
+{   static int driver_max=0,suggest_max=0;
+    int i,dr=driver_byte(scsiresult)&DRIVER_MASK, 
+	su=(driver_byte(scsiresult)&SUGGEST_MASK)>>4;
+
+    if(!driver_max) {
+        for(i=0;driverbyte_table[i];i++) ;
+        driver_max=i;
+	for(i=0;driversuggest_table[i];i++) ;
+	suggest_max=i;
+    }
+    printk("Driverbyte=0x%02x",driver_byte(scsiresult));
+    printk("(%s,%s) ",
+	dr<driver_max  ? driverbyte_table[dr]:"invalid",
+	su<suggest_max ? driversuggest_table[su]:"invalid");
+}
+#else
+void print_driverbyte(int scsiresult)
+{   printk("Driverbyte=0x%02x ",driver_byte(scsiresult));
+}
+#endif
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/constants.h b/xen/drivers/scsi/constants.h
new file mode 100644
index 0000000000..e10527ea5e
--- /dev/null
+++ b/xen/drivers/scsi/constants.h
@@ -0,0 +1,6 @@
+#ifndef _CONSTANTS_H
+#define _CONSTANTS_H
+extern int print_msg(unsigned char *);
+extern void print_status(int);
+extern void print_Scsi_Cmnd (Scsi_Cmnd *);
+#endif /* def _CONSTANTS_H */
diff --git a/xen/drivers/scsi/hosts.c b/xen/drivers/scsi/hosts.c
new file mode 100644
index 0000000000..ea613aaa47
--- /dev/null
+++ b/xen/drivers/scsi/hosts.c
@@ -0,0 +1,316 @@
+/*
+ *  hosts.c Copyright (C) 1992 Drew Eckhardt
+ *          Copyright (C) 1993, 1994, 1995 Eric Youngdale
+ *
+ *  mid to lowlevel SCSI driver interface
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *  Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ *  Added QLOGIC QLA1280 SCSI controller kernel host support. 
+ *     August 4, 1999 Fred Lewis, Intel DuPont
+ *
+ *  Updated to reflect the new initialization scheme for the higher 
+ *  level of scsi drivers (sd/sr/st)
+ *  September 17, 2000 Torben Mathiasen <tmm@image.dk>
+ */
+
+
+/*
+ *  This file contains the medium level SCSI
+ *  host interface initialization, as well as the scsi_hosts array of SCSI
+ *  hosts currently present in the system.
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/blk.h>
+/*  #include <xeno/kernel.h> */
+/*  #include <xeno/string.h> */
+/*  #include <xeno/mm.h> */
+/*  #include <xeno/proc_fs.h> */
+#include <xeno/init.h>
+
+#define __KERNEL_SYSCALLS__
+
+/* #include <xeno/unistd.h> */
+
+#include "scsi.h"
+#include "hosts.h"
+
+/*
+static const char RCSid[] = "$Header: /vger/u4/cvs/linux/drivers/scsi/hosts.c,v 1.20 1996/12/12 19:18:32 davem Exp $";
+*/
+
+/*
+ *  The scsi host entries should be in the order you wish the
+ *  cards to be detected.  A driver may appear more than once IFF
+ *  it can deal with being detected (and therefore initialized)
+ *  with more than one simultaneous host number, can handle being
+ *  reentrant, etc.
+ *
+ *  They may appear in any order, as each SCSI host is told which host 
+ *  number it is during detection.
+ */
+
+/* This is a placeholder for controllers that are not configured into
+ * the system - we do this to ensure that the controller numbering is
+ * always consistent, no matter how the kernel is configured. */
+
+#define NO_CONTROLLER {NULL, NULL, NULL, NULL, NULL, NULL, NULL, \
+			   NULL, NULL, 0, 0, 0, 0, 0, 0}
+
+/*
+ *  When figure is run, we don't want to link to any object code.  Since
+ *  the macro for each host will contain function pointers, we cannot
+ *  use it and instead must use a "blank" that does no such
+ *  idiocy.
+ */
+
+Scsi_Host_Template * scsi_hosts;
+
+
+/*
+ *  Our semaphores and timeout counters, where size depends on 
+ *      MAX_SCSI_HOSTS here.
+ */
+
+Scsi_Host_Name * scsi_host_no_list;
+struct Scsi_Host * scsi_hostlist;
+struct Scsi_Device_Template * scsi_devicelist;
+
+int max_scsi_hosts;
+int next_scsi_host;
+
+void
+scsi_unregister(struct Scsi_Host * sh){
+    struct Scsi_Host * shpnt;
+    Scsi_Host_Name *shn;
+        
+    if(scsi_hostlist == sh)
+	scsi_hostlist = sh->next;
+    else {
+	shpnt = scsi_hostlist;
+	while(shpnt->next != sh) shpnt = shpnt->next;
+	shpnt->next = shpnt->next->next;
+    }
+
+    /*
+     * We have to unregister the host from the scsi_host_no_list as well.
+     * Decide by the host_no not by the name because most host drivers are
+     * able to handle more than one adapters from the same kind (or family).
+     */
+    for ( shn=scsi_host_no_list; shn && (sh->host_no != shn->host_no);
+	  shn=shn->next);
+    if (shn) shn->host_registered = 0;
+    /* else {} : This should not happen, we should panic here... */
+    
+    /* If we are removing the last host registered, it is safe to reuse
+     * its host number (this avoids "holes" at boot time) (DB) 
+     * It is also safe to reuse those of numbers directly below which have
+     * been released earlier (to avoid some holes in numbering).
+     */
+    if(sh->host_no == max_scsi_hosts - 1) {
+	while(--max_scsi_hosts >= next_scsi_host) {
+	    shpnt = scsi_hostlist;
+	    while(shpnt && shpnt->host_no != max_scsi_hosts - 1)
+		shpnt = shpnt->next;
+	    if(shpnt)
+		break;
+	}
+    }
+    next_scsi_host--;
+    kfree((char *) sh);
+}
+
+/* We call this when we come across a new host adapter. We only do this
+ * once we are 100% sure that we want to use this host adapter -  it is a
+ * pain to reverse this, so we try to avoid it 
+ */
+
+struct Scsi_Host * scsi_register(Scsi_Host_Template * tpnt, int j){
+    struct Scsi_Host * retval, *shpnt, *o_shp;
+    Scsi_Host_Name *shn, *shn2;
+    int flag_new = 1;
+    const char * hname;
+    size_t hname_len;
+    retval = (struct Scsi_Host *)kmalloc(sizeof(struct Scsi_Host) + j,
+					 (tpnt->unchecked_isa_dma && j ? 
+					  GFP_DMA : 0) | GFP_ATOMIC);
+    if(retval == NULL)
+    {
+        printk("scsi: out of memory in scsi_register.\n");
+    	return NULL;
+    }
+    	
+    memset(retval, 0, sizeof(struct Scsi_Host) + j);
+
+    /* trying to find a reserved entry (host_no) */
+    hname = (tpnt->proc_name) ?  tpnt->proc_name : "";
+    hname_len = strlen(hname);
+    for (shn = scsi_host_no_list;shn;shn = shn->next) {
+	if (!(shn->host_registered) && 
+	    (hname_len > 0) && (0 == strncmp(hname, shn->name, hname_len))) {
+	    flag_new = 0;
+	    retval->host_no = shn->host_no;
+	    shn->host_registered = 1;
+	    shn->loaded_as_module = 1;
+	    break;
+	}
+    }
+    atomic_set(&retval->host_active,0);
+    retval->host_busy = 0;
+    retval->host_failed = 0;
+    if(j > 0xffff) panic("Too many extra bytes requested\n");
+    retval->extra_bytes = j;
+    retval->loaded_as_module = 1;
+    if (flag_new) {
+	shn = (Scsi_Host_Name *) kmalloc(sizeof(Scsi_Host_Name), GFP_ATOMIC);
+        if (!shn) {
+                kfree(retval);
+                printk(KERN_ERR "scsi: out of memory(2) in scsi_register.\n");
+                return NULL;
+        }
+	shn->name = kmalloc(hname_len + 1, GFP_ATOMIC);
+	if (hname_len > 0)
+	    strncpy(shn->name, hname, hname_len);
+	shn->name[hname_len] = 0;
+	shn->host_no = max_scsi_hosts++;
+	shn->host_registered = 1;
+	shn->loaded_as_module = 1;
+	shn->next = NULL;
+	if (scsi_host_no_list) {
+	    for (shn2 = scsi_host_no_list;shn2->next;shn2 = shn2->next)
+		;
+	    shn2->next = shn;
+	}
+	else
+	    scsi_host_no_list = shn;
+	retval->host_no = shn->host_no;
+    }
+    next_scsi_host++;
+    retval->host_queue = NULL;
+#if 0
+    init_waitqueue_head(&retval->host_wait);
+#endif
+    retval->resetting = 0;
+    retval->last_reset = 0;
+    retval->irq = 0;
+    retval->dma_channel = 0xff;
+
+    /* These three are default values which can be overridden */
+    retval->max_channel = 0; 
+    retval->max_id = 8;      
+    retval->max_lun = 8;
+
+    /*
+     * All drivers right now should be able to handle 12 byte commands.
+     * Every so often there are requests for 16 byte commands, but individual
+     * low-level drivers need to certify that they actually do something
+     * sensible with such commands.
+     */
+    retval->max_cmd_len = 12;
+
+    retval->unique_id = 0;
+    retval->io_port = 0;
+    retval->hostt = tpnt;
+    retval->next = NULL;
+    retval->in_recovery = 0;
+    retval->ehandler = NULL;    /* Initial value until the thing starts up. */
+    retval->eh_notify   = NULL;    /* Who we notify when we exit. */
+
+
+    retval->host_blocked = FALSE;
+    retval->host_self_blocked = FALSE;
+
+#ifdef DEBUG
+    printk("Register %x %x: %d\n", (int)retval, (int)retval->hostt, j);
+#endif
+
+    /* The next six are the default values which can be overridden
+     * if need be */
+    retval->this_id = tpnt->this_id;
+    retval->can_queue = tpnt->can_queue;
+    retval->sg_tablesize = tpnt->sg_tablesize;
+    retval->cmd_per_lun = tpnt->cmd_per_lun;
+    retval->unchecked_isa_dma = tpnt->unchecked_isa_dma;
+    retval->use_clustering = tpnt->use_clustering;   
+
+    retval->select_queue_depths = tpnt->select_queue_depths;
+    retval->max_sectors = tpnt->max_sectors;
+
+    if(!scsi_hostlist)
+	scsi_hostlist = retval;
+    else {
+	shpnt = scsi_hostlist;
+	if (retval->host_no < shpnt->host_no) {
+	    retval->next = shpnt;
+	    wmb(); /* want all to see these writes in this order */
+	    scsi_hostlist = retval;
+	}
+	else {
+	    for (o_shp = shpnt, shpnt = shpnt->next; shpnt; 
+		 o_shp = shpnt, shpnt = shpnt->next) {
+		if (retval->host_no < shpnt->host_no) {
+		    retval->next = shpnt;
+		    wmb();
+		    o_shp->next = retval;
+		    break;
+		}
+	    }
+	    if (! shpnt)
+		o_shp->next = retval;
+        }
+    }
+    
+    return retval;
+}
+
+int
+scsi_register_device(struct Scsi_Device_Template * sdpnt)
+{
+    if(sdpnt->next) panic("Device already registered");
+    sdpnt->next = scsi_devicelist;
+    scsi_devicelist = sdpnt;
+    return 0;
+}
+
+void
+scsi_deregister_device(struct Scsi_Device_Template * tpnt)
+{
+    struct Scsi_Device_Template *spnt;
+    struct Scsi_Device_Template *prev_spnt;
+
+    spnt = scsi_devicelist;
+    prev_spnt = NULL;
+    while (spnt != tpnt) {
+    prev_spnt = spnt;
+    spnt = spnt->next;
+    }
+    if (prev_spnt == NULL)
+        scsi_devicelist = tpnt->next;
+    else
+        prev_spnt->next = spnt->next;
+}
+
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/hosts.h b/xen/drivers/scsi/hosts.h
new file mode 100644
index 0000000000..34d3592e0e
--- /dev/null
+++ b/xen/drivers/scsi/hosts.h
@@ -0,0 +1,591 @@
+/*
+ *  hosts.h Copyright (C) 1992 Drew Eckhardt
+ *          Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale
+ *
+ *  mid to low-level SCSI driver interface header
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *	 Modified by Eric Youngdale eric@andante.org to
+ *	 add scatter-gather, multiple outstanding request, and other
+ *	 enhancements.
+ *
+ *  Further modified by Eric Youngdale to support multiple host adapters
+ *  of the same type.
+ *
+ *  Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ */
+
+#ifndef _HOSTS_H
+#define _HOSTS_H
+
+/*
+    $Header: /vger/u4/cvs/linux/drivers/scsi/hosts.h,v 1.6 1997/01/19 23:07:13 davem Exp $
+*/
+
+#include <xeno/config.h>
+/*#include <xeno/proc_fs.h>*/
+#include <xeno/pci.h>
+
+/* It is senseless to set SG_ALL any higher than this - the performance
+ *  does not get any better, and it wastes memory
+ */
+#define SG_NONE 0
+#define SG_ALL 0xff
+
+#define DISABLE_CLUSTERING 0
+#define ENABLE_CLUSTERING 1
+
+/* The various choices mean:
+ * NONE: Self evident.	Host adapter is not capable of scatter-gather.
+ * ALL:	 Means that the host adapter module can do scatter-gather,
+ *	 and that there is no limit to the size of the table to which
+ *	 we scatter/gather data.
+ * Anything else:  Indicates the maximum number of chains that can be
+ *	 used in one scatter-gather request.
+ */
+
+/*
+ * The Scsi_Host_Template type has all that is needed to interface with a SCSI
+ * host in a device independent matter.	 There is one entry for each different
+ * type of host adapter that is supported on the system.
+ */
+
+typedef struct scsi_disk Disk;
+
+typedef struct	SHT
+{
+
+    /* Used with loadable modules so we can construct a linked list. */
+    struct SHT * next;
+
+    /* Used with loadable modules so that we know when it is safe to unload */
+    struct module * module;
+
+#ifdef CONFIG_PROC_FS
+    /* The pointer to the /proc/scsi directory entry */
+    struct proc_dir_entry *proc_dir;
+
+    /* proc-fs info function.
+     * Can be used to export driver statistics and other infos to the world
+     * outside the kernel ie. userspace and it also provides an interface
+     * to feed the driver with information. Check eata_dma_proc.c for reference
+     */
+    int (*proc_info)(char *, char **, off_t, int, int, int);
+#endif
+
+    /*
+     * The name pointer is a pointer to the name of the SCSI
+     * device detected.
+     */
+    const char *name;
+
+    /*
+     * The detect function shall return non zero on detection,
+     * indicating the number of host adapters of this particular
+     * type were found.	 It should also
+     * initialize all data necessary for this particular
+     * SCSI driver.  It is passed the host number, so this host
+     * knows where the first entry is in the scsi_hosts[] array.
+     *
+     * Note that the detect routine MUST not call any of the mid level
+     * functions to queue commands because things are not guaranteed
+     * to be set up yet.  The detect routine can send commands to
+     * the host adapter as long as the program control will not be
+     * passed to scsi.c in the processing of the command.  Note
+     * especially that scsi_malloc/scsi_free must not be called.
+     */
+    int (* detect)(struct SHT *);
+
+    int (*revoke)(Scsi_Device *);
+
+    /* Used with loadable modules to unload the host structures.  Note:
+     * there is a default action built into the modules code which may
+     * be sufficient for most host adapters.  Thus you may not have to supply
+     * this at all.
+     */
+    int (*release)(struct Scsi_Host *);
+
+    /*
+     * The info function will return whatever useful
+     * information the developer sees fit.  If not provided, then
+     * the name field will be used instead.
+     */
+    const char *(* info)(struct Scsi_Host *);
+
+    /*
+     * ioctl interface
+     */
+    int (*ioctl)(Scsi_Device *dev, int cmd, void *arg);
+
+    /*
+     * The command function takes a target, a command (this is a SCSI
+     * command formatted as per the SCSI spec, nothing strange), a
+     * data buffer pointer, and data buffer length pointer.  The return
+     * is a status int, bit fielded as follows :
+     * Byte What
+     * 0    SCSI status code
+     * 1    SCSI 1 byte message
+     * 2    host error return.
+     * 3    mid level error return
+     */
+    int (* command)(Scsi_Cmnd *);
+
+    /*
+     * The QueueCommand function works in a similar manner
+     * to the command function.	 It takes an additional parameter,
+     * void (* done)(int host, int code) which is passed the host
+     * # and exit result when the command is complete.
+     * Host number is the POSITION IN THE hosts array of THIS
+     * host adapter.
+     *
+     * The done() function must only be called after QueueCommand() 
+     * has returned.
+     */
+    int (* queuecommand)(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+
+    /*
+     * This is an error handling strategy routine.  You don't need to
+     * define one of these if you don't want to - there is a default
+     * routine that is present that should work in most cases.  For those
+     * driver authors that have the inclination and ability to write their
+     * own strategy routine, this is where it is specified.  Note - the
+     * strategy routine is *ALWAYS* run in the context of the kernel eh
+     * thread.  Thus you are guaranteed to *NOT* be in an interrupt handler
+     * when you execute this, and you are also guaranteed to *NOT* have any
+     * other commands being queued while you are in the strategy routine.
+     * When you return from this function, operations return to normal.
+     *
+     * See scsi_error.c scsi_unjam_host for additional comments about what
+     * this function should and should not be attempting to do.
+     */
+     int (*eh_strategy_handler)(struct Scsi_Host *);
+     int (*eh_abort_handler)(Scsi_Cmnd *);
+     int (*eh_device_reset_handler)(Scsi_Cmnd *);
+     int (*eh_bus_reset_handler)(Scsi_Cmnd *);
+     int (*eh_host_reset_handler)(Scsi_Cmnd *);
+
+    /*
+     * Since the mid level driver handles time outs, etc, we want to
+     * be able to abort the current command.  Abort returns 0 if the
+     * abortion was successful.	 The field SCpnt->abort reason
+     * can be filled in with the appropriate reason why we wanted
+     * the abort in the first place, and this will be used
+     * in the mid-level code instead of the host_byte().
+     * If non-zero, the code passed to it
+     * will be used as the return code, otherwise
+     * DID_ABORT  should be returned.
+     *
+     * Note that the scsi driver should "clean up" after itself,
+     * resetting the bus, etc.	if necessary.
+     *
+     * NOTE - this interface is depreciated, and will go away.  Use
+     * the eh_ routines instead.
+     */
+    int (* abort)(Scsi_Cmnd *);
+
+    /*
+     * The reset function will reset the SCSI bus.  Any executing
+     * commands should fail with a DID_RESET in the host byte.
+     * The Scsi_Cmnd  is passed so that the reset routine can figure
+     * out which host adapter should be reset, and also which command
+     * within the command block was responsible for the reset in
+     * the first place.	 Some hosts do not implement a reset function,
+     * and these hosts must call scsi_request_sense(SCpnt) to keep
+     * the command alive.
+     *
+     * NOTE - this interface is depreciated, and will go away.  Use
+     * the eh_ routines instead.
+     */
+    int (* reset)(Scsi_Cmnd *, unsigned int);
+
+    /*
+     * This function is used to select synchronous communications,
+     * which will result in a higher data throughput.  Not implemented
+     * yet.
+     */
+    int (* slave_attach)(int, int);
+
+    /*
+     * This function determines the bios parameters for a given
+     * harddisk.  These tend to be numbers that are made up by
+     * the host adapter.  Parameters:
+     * size, device number, list (heads, sectors, cylinders)
+     */
+    int (* bios_param)(Disk *, kdev_t, int []);
+
+
+    /*
+     * Used to set the queue depth for a specific device.
+     */
+    void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
+
+    /*
+     * This determines if we will use a non-interrupt driven
+     * or an interrupt driven scheme,  It is set to the maximum number
+     * of simultaneous commands a given host adapter will accept.
+     */
+    int can_queue;
+
+    /*
+     * In many instances, especially where disconnect / reconnect are
+     * supported, our host also has an ID on the SCSI bus.  If this is
+     * the case, then it must be reserved.  Please set this_id to -1 if
+     * your setup is in single initiator mode, and the host lacks an
+     * ID.
+     */
+    int this_id;
+
+    /*
+     * This determines the degree to which the host adapter is capable
+     * of scatter-gather.
+     */
+    short unsigned int sg_tablesize;
+
+    /*
+     * if the host adapter has limitations beside segment count
+     */
+    short unsigned int max_sectors;
+
+    /*
+     * True if this host adapter can make good use of linked commands.
+     * This will allow more than one command to be queued to a given
+     * unit on a given host.  Set this to the maximum number of command
+     * blocks to be provided for each device.  Set this to 1 for one
+     * command block per lun, 2 for two, etc.  Do not set this to 0.
+     * You should make sure that the host adapter will do the right thing
+     * before you try setting this above 1.
+     */
+    short cmd_per_lun;
+
+    /*
+     * present contains counter indicating how many boards of this
+     * type were found when we did the scan.
+     */
+    unsigned char present;
+
+    /*
+     * true if this host adapter uses unchecked DMA onto an ISA bus.
+     */
+    unsigned unchecked_isa_dma:1;
+
+    /*
+     * true if this host adapter can make good use of clustering.
+     * I originally thought that if the tablesize was large that it
+     * was a waste of CPU cycles to prepare a cluster list, but
+     * it works out that the Buslogic is faster if you use a smaller
+     * number of segments (i.e. use clustering).  I guess it is
+     * inefficient.
+     */
+    unsigned use_clustering:1;
+
+    /*
+     * True if this driver uses the new error handling code.  This flag is
+     * really only temporary until all of the other drivers get converted
+     * to use the new error handling code.
+     */
+    unsigned use_new_eh_code:1;
+
+    /*
+     * True for emulated SCSI host adapters (e.g. ATAPI)
+     */
+    unsigned emulated:1;
+
+    /*
+     * Name of proc directory
+     */
+    char *proc_name;
+
+} Scsi_Host_Template;
+
+/*
+ * The scsi_hosts array is the array containing the data for all
+ * possible <supported> scsi hosts.   This is similar to the
+ * Scsi_Host_Template, except that we have one entry for each
+ * actual physical host adapter on the system, stored as a linked
+ * list.  Note that if there are 2 aha1542 boards, then there will
+ * be two Scsi_Host entries, but only 1 Scsi_Host_Template entry.
+ */
+
+struct Scsi_Host
+{
+/* private: */
+    /*
+     * This information is private to the scsi mid-layer.  Wrapping it in a
+     * struct private is a way of marking it in a sort of C++ type of way.
+     */
+    struct Scsi_Host      * next;
+    Scsi_Device           * host_queue;
+
+
+    struct task_struct    * ehandler;  /* Error recovery thread. */
+    struct semaphore      * eh_wait;   /* The error recovery thread waits on
+                                          this. */
+    struct semaphore      * eh_notify; /* wait for eh to begin */
+    struct semaphore      * eh_action; /* Wait for specific actions on the
+                                          host. */
+    unsigned int            eh_active:1; /* Indicates the eh thread is awake and active if
+                                          this is true. */
+#if 0 
+    wait_queue_head_t       host_wait;
+#endif
+    Scsi_Host_Template    * hostt;
+    atomic_t                host_active; /* commands checked out */
+    volatile unsigned short host_busy;   /* commands actually active on low-level */
+    volatile unsigned short host_failed; /* commands that failed. */
+    
+/* public: */
+    unsigned short extra_bytes;
+    unsigned short host_no;  /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
+    int resetting; /* if set, it means that last_reset is a valid value */
+    unsigned long last_reset;
+
+
+    /*
+     *	These three parameters can be used to allow for wide scsi,
+     *	and for host adapters that support multiple busses
+     *	The first two should be set to 1 more than the actual max id
+     *	or lun (i.e. 8 for normal systems).
+     */
+    unsigned int max_id;
+    unsigned int max_lun;
+    unsigned int max_channel;
+
+    /* These parameters should be set by the detect routine */
+    unsigned long base;
+    unsigned long io_port;
+    unsigned char n_io_port;
+    unsigned char dma_channel;
+    unsigned int  irq;
+
+    /*
+     * This is a unique identifier that must be assigned so that we
+     * have some way of identifying each detected host adapter properly
+     * and uniquely.  For hosts that do not support more than one card
+     * in the system at one time, this does not need to be set.  It is
+     * initialized to 0 in scsi_register.
+     */
+    unsigned int unique_id;
+
+    /*
+     * The rest can be copied from the template, or specifically
+     * initialized, as required.
+     */
+
+    /*
+     * The maximum length of SCSI commands that this host can accept.
+     * Probably 12 for most host adapters, but could be 16 for others.
+     * For drivers that don't set this field, a value of 12 is
+     * assumed.  I am leaving this as a number rather than a bit
+     * because you never know what subsequent SCSI standards might do
+     * (i.e. could there be a 20 byte or a 24-byte command a few years
+     * down the road?).  
+     */
+    unsigned char max_cmd_len;
+
+    int this_id;
+    int can_queue;
+    short cmd_per_lun;
+    short unsigned int sg_tablesize;
+    short unsigned int max_sectors;
+
+    unsigned in_recovery:1;
+    unsigned unchecked_isa_dma:1;
+    unsigned use_clustering:1;
+    /*
+     * True if this host was loaded as a loadable module
+     */
+    unsigned loaded_as_module:1;
+
+    /*
+     * Host has rejected a command because it was busy.
+     */
+    unsigned host_blocked:1;
+
+    /*
+     * Host has requested that no further requests come through for the
+     * time being.
+     */
+    unsigned host_self_blocked:1;
+    
+    /*
+     * Host uses correct SCSI ordering not PC ordering. The bit is
+     * set for the minority of drivers whose authors actually read the spec ;)
+     */
+    unsigned reverse_ordering:1;
+
+    /*
+     * Indicates that one or more devices on this host were starved, and
+     * when the device becomes less busy that we need to feed them.
+     */
+    unsigned some_device_starved:1;
+   
+    void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
+
+    /*
+     * For SCSI hosts which are PCI devices, set pci_dev so that
+     * we can do BIOS EDD 3.0 mappings
+     */
+    struct pci_dev *pci_dev;
+
+    /*
+     * We should ensure that this is aligned, both for better performance
+     * and also because some compilers (m68k) don't automatically force
+     * alignment to a long boundary.
+     */
+    unsigned long hostdata[0]  /* Used for storage of host specific stuff */
+        __attribute__ ((aligned (sizeof(unsigned long))));
+};
+
+/*
+ * These two functions are used to allocate and free a pseudo device
+ * which will connect to the host adapter itself rather than any
+ * physical device.  You must deallocate when you are done with the
+ * thing.  This physical pseudo-device isn't real and won't be available
+ * from any high-level drivers.
+ */
+extern void scsi_free_host_dev(Scsi_Device * SDpnt);
+extern Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt);
+
+extern void scsi_unblock_requests(struct Scsi_Host * SHpnt);
+extern void scsi_block_requests(struct Scsi_Host * SHpnt);
+extern void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel);
+
+typedef struct SHN
+    {
+    struct SHN * next;
+    char * name;
+    unsigned short host_no;
+    unsigned short host_registered;
+    unsigned loaded_as_module;
+    } Scsi_Host_Name;
+	
+extern Scsi_Host_Name * scsi_host_no_list;
+extern struct Scsi_Host * scsi_hostlist;
+extern struct Scsi_Device_Template * scsi_devicelist;
+
+extern Scsi_Host_Template * scsi_hosts;
+
+extern void build_proc_dir_entries(Scsi_Host_Template  *);
+
+/*
+ *  scsi_init initializes the scsi hosts.
+ */
+
+extern int next_scsi_host;
+
+unsigned int scsi_init(void);
+extern struct Scsi_Host * scsi_register(Scsi_Host_Template *, int j);
+extern void scsi_unregister(struct Scsi_Host * i);
+
+extern void scsi_register_blocked_host(struct Scsi_Host * SHpnt);
+extern void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt);
+
+static inline void scsi_set_pci_device(struct Scsi_Host *SHpnt,
+                                       struct pci_dev *pdev)
+{
+	SHpnt->pci_dev = pdev;
+}
+
+
+/*
+ * Prototypes for functions/data in scsi_scan.c
+ */
+extern void scan_scsis(struct Scsi_Host *shpnt,
+		       uint hardcoded,
+		       uint hchannel,
+		       uint hid,
+                       uint hlun);
+
+extern void scsi_mark_host_reset(struct Scsi_Host *Host);
+
+#define BLANK_HOST {"", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+struct Scsi_Device_Template
+{
+    struct Scsi_Device_Template * next;
+    const char * name;
+    const char * tag;
+    struct module * module;	  /* Used for loadable modules */
+    unsigned char scsi_type;
+    unsigned int major;
+    unsigned int min_major;      /* Minimum major in range. */ 
+    unsigned int max_major;      /* Maximum major in range. */
+    unsigned int nr_dev;	  /* Number currently attached */
+    unsigned int dev_noticed;	  /* Number of devices detected. */
+    unsigned int dev_max;	  /* Current size of arrays */
+    unsigned blk:1;		  /* 0 if character device */
+    int (*detect)(Scsi_Device *); /* Returns 1 if we can attach this device */
+    int (*init)(void);		  /* Sizes arrays based upon number of devices
+		   *  detected */
+    void (*finish)(void);	  /* Perform initialization after attachment */
+    int (*attach)(Scsi_Device *); /* Attach devices to arrays */
+    void (*detach)(Scsi_Device *);
+    int (*init_command)(Scsi_Cmnd *);     /* Used by new queueing code. 
+                                           Selects command for blkdevs */
+};
+
+void  scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt);
+
+int scsi_register_device(struct Scsi_Device_Template * sdpnt);
+void scsi_deregister_device(struct Scsi_Device_Template * tpnt);
+
+#if 0 
+/* These are used by loadable modules */
+extern int scsi_register_module(int, void *);
+extern int scsi_unregister_module(int, void *);
+#endif
+
+/* The different types of modules that we can load and unload */
+#define MODULE_SCSI_HA 1
+#define MODULE_SCSI_CONST 2
+#define MODULE_SCSI_IOCTL 3
+#define MODULE_SCSI_DEV 4
+
+
+/*
+ * This is an ugly hack.  If we expect to be able to load devices at run time,
+ * we need to leave extra room in some of the data structures.	Doing a
+ * realloc to enlarge the structures would be riddled with race conditions,
+ * so until a better solution is discovered, we use this crude approach
+ *
+ * Even bigger hack for SparcSTORAGE arrays. Those are at least 6 disks, but
+ * usually up to 30 disks, so everyone would need to change this. -jj
+ *
+ * Note: These things are all evil and all need to go away.  My plan is to
+ * tackle the character devices first, as there aren't any locking implications
+ * in the block device layer.   The block devices will require more work.
+ *
+ * The generics driver has been updated to resize as required.  So as the tape
+ * driver. Two down, two more to go.
+ */
+#ifndef CONFIG_SD_EXTRA_DEVS
+#define CONFIG_SD_EXTRA_DEVS 2
+#endif
+#ifndef CONFIG_SR_EXTRA_DEVS
+#define CONFIG_SR_EXTRA_DEVS 2
+#endif
+#define SD_EXTRA_DEVS CONFIG_SD_EXTRA_DEVS
+#define SR_EXTRA_DEVS CONFIG_SR_EXTRA_DEVS
+
+#endif
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi.c b/xen/drivers/scsi/scsi.c
new file mode 100644
index 0000000000..85a59f54ac
--- /dev/null
+++ b/xen/drivers/scsi/scsi.c
@@ -0,0 +1,2999 @@
+/*
+ *  scsi.c Copyright (C) 1992 Drew Eckhardt
+ *         Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
+ *
+ *  generic mid-level SCSI driver
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *  Bug correction thanks go to :
+ *      Rik Faith <faith@cs.unc.edu>
+ *      Tommy Thorn <tthorn>
+ *      Thomas Wuensche <tw@fgb1.fgb.mw.tu-muenchen.de>
+ *
+ *  Modified by Eric Youngdale eric@andante.org or ericy@gnu.ai.mit.edu to
+ *  add scatter-gather, multiple outstanding request, and other
+ *  enhancements.
+ *
+ *  Native multichannel, wide scsi, /proc/scsi and hot plugging
+ *  support added by Michael Neuffer <mike@i-connect.net>
+ *
+ *  Added request_module("scsi_hostadapter") for kerneld:
+ *  (Put an "alias scsi_hostadapter your_hostadapter" in /etc/modules.conf)
+ *  Bjorn Ekwall  <bj0rn@blox.se>
+ *  (changed to kmod)
+ *
+ *  Major improvements to the timeout, abort, and reset processing,
+ *  as well as performance modifications for large queue depths by
+ *  Leonard N. Zubkoff <lnz@dandelion.com>
+ *
+ *  Converted cli() code to spinlocks, Ingo Molnar
+ *
+ *  Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ *
+ *  out_of_space hacks, D. Gilbert (dpg) 990608
+ */
+
+#define REVISION	"Revision: 1.00"
+#define VERSION		"Id: scsi.c 1.00 2000/09/26"
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+#include <xeno/lib.h>
+#include <xeno/slab.h>
+#include <xeno/ioport.h>
+/*#include <xeno/stat.h>*/
+#include <xeno/blk.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+#include <xeno/init.h>
+/*#include <xeno/smp_lock.h>*/
+/*#include <xeno/completion.h>*/
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+#include <xeno/spinlock.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/uaccess.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <xeno/kmod.h>
+#endif
+
+#undef USE_STATIC_SCSI_MEMORY
+
+struct proc_dir_entry *proc_scsi;
+
+#ifdef CONFIG_PROC_FS
+static int scsi_proc_info(char *buffer, char **start, off_t offset, int length);
+static void scsi_dump_status(int level);
+#endif
+
+/*
+   static const char RCSid[] = "$Header: /vger/u4/cvs/linux/drivers/scsi/scsi.c,v 1.38 1997/01/19 23:07:18 davem Exp $";
+ */
+
+/*
+ * Definitions and constants.
+ */
+
+#define MIN_RESET_DELAY (2*HZ)
+
+/* Do not call reset on error if we just did a reset within 15 sec. */
+#define MIN_RESET_PERIOD (15*HZ)
+
+/*
+ * Macro to determine the size of SCSI command. This macro takes vendor
+ * unique commands into account. SCSI commands in groups 6 and 7 are
+ * vendor unique and we will depend upon the command length being
+ * supplied correctly in cmd_len.
+ */
+#define CDB_SIZE(SCpnt)	((((SCpnt->cmnd[0] >> 5) & 7) < 6) ? \
+				COMMAND_SIZE(SCpnt->cmnd[0]) : SCpnt->cmd_len)
+
+/*
+ * Data declarations.
+ */
+unsigned long scsi_pid;
+Scsi_Cmnd *last_cmnd;
+/* Command group 3 is reserved and should never be used.  */
+const unsigned char scsi_command_size[8] =
+{
+	6, 10, 10, 12,
+	16, 12, 10, 10
+};
+static unsigned long serial_number;
+static Scsi_Cmnd *scsi_bh_queue_head;
+static Scsi_Cmnd *scsi_bh_queue_tail;
+
+/*
+ * Note - the initial logging level can be set here to log events at boot time.
+ * After the system is up, you may enable logging via the /proc interface.
+ */
+unsigned int scsi_logging_level;
+
+const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] =
+{
+	"Direct-Access    ",
+	"Sequential-Access",
+	"Printer          ",
+	"Processor        ",
+	"WORM             ",
+	"CD-ROM           ",
+	"Scanner          ",
+	"Optical Device   ",
+	"Medium Changer   ",
+	"Communications   ",
+	"Unknown          ",
+	"Unknown          ",
+	"Unknown          ",
+	"Enclosure        ",
+};
+
+/* 
+ * Function prototypes.
+ */
+extern void scsi_times_out(Scsi_Cmnd * SCpnt);
+void scsi_build_commandblocks(Scsi_Device * SDpnt);
+
+#if 0
+/*
+ * These are the interface to the old error handling code.  It should go away
+ * someday soon.
+ */
+extern void scsi_old_done(Scsi_Cmnd * SCpnt);
+extern void scsi_old_times_out(Scsi_Cmnd * SCpnt);
+extern int scsi_old_reset(Scsi_Cmnd *SCpnt, unsigned int flag);
+#endif
+
+/* 
+ * Private interface into the new error handling code.
+ */
+extern int scsi_new_reset(Scsi_Cmnd *SCpnt, unsigned int flag);
+
+/*
+ * Function:    scsi_initialize_queue()
+ *
+ * Purpose:     Selects queue handler function for a device.
+ *
+ * Arguments:   SDpnt   - device for which we need a handler function.
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:       Most devices will end up using scsi_request_fn for the
+ *              handler function (at least as things are done now).
+ *              The "block" feature basically ensures that only one of
+ *              the blocked hosts is active at one time, mainly to work around
+ *              buggy DMA chipsets where the memory gets starved.
+ *              For this case, we have a special handler function, which
+ *              does some checks and ultimately calls scsi_request_fn.
+ *
+ *              The single_lun feature is a similar special case.
+ *
+ *              We handle these things by stacking the handlers.  The
+ *              special case handlers simply check a few conditions,
+ *              and return if they are not supposed to do anything.
+ *              In the event that things are OK, then they call the next
+ *              handler in the list - ultimately they call scsi_request_fn
+ *              to do the dirty deed.
+ */
+void  scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) {
+	blk_init_queue(&SDpnt->request_queue, scsi_request_fn);
+        blk_queue_headactive(&SDpnt->request_queue, 0);
+        SDpnt->request_queue.queuedata = (void *) SDpnt;
+}
+
+#ifdef MODULE
+MODULE_PARM(scsi_logging_level, "i");
+MODULE_PARM_DESC(scsi_logging_level, "SCSI logging level; should be zero or nonzero");
+
+#else
+static int __init scsi_logging_setup(char *str)
+{
+#if 0
+    int tmp;
+    
+    if (get_option(&str, &tmp) == 1) {
+        scsi_logging_level = (tmp ? ~0 : 0);
+        return 1;
+    } else {
+        printk(KERN_INFO "scsi_logging_setup : usage scsi_logging_level=n "
+               "(n should be 0 or non-zero)\n");
+        return 0;
+    }
+#else
+    return 0; 
+#endif
+
+}
+__setup("scsi_logging=", scsi_logging_setup);
+
+#endif
+
+/*
+ *	Issue a command and wait for it to complete
+ */
+ 
+static void scsi_wait_done(Scsi_Cmnd * SCpnt)
+{
+    struct request *req;
+    
+    req = &SCpnt->request;
+    req->rq_status = RQ_SCSI_DONE;	/* Busy, but indicate request done */
+    
+#if 0
+    if (req->waiting != NULL) {
+        complete(req->waiting);
+    }
+#endif
+}
+
+/*
+ * This lock protects the freelist for all devices on the system.
+ * We could make this finer grained by having a single lock per
+ * device if it is ever found that there is excessive contention
+ * on this lock.
+ */
+static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Used to protect insertion into and removal from the queue of
+ * commands to be processed by the bottom half handler.
+ */
+static spinlock_t scsi_bhqueue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Function:    scsi_allocate_request
+ *
+ * Purpose:     Allocate a request descriptor.
+ *
+ * Arguments:   device    - device for which we want a request
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to request block.
+ *
+ * Notes:       With the new queueing code, it becomes important
+ *              to track the difference between a command and a
+ *              request.  A request is a pending item in the queue that
+ *              has not yet reached the top of the queue.
+ */
+
+Scsi_Request *scsi_allocate_request(Scsi_Device * device)
+{
+    Scsi_Request *SRpnt = NULL;
+    
+    if (!device)
+        panic("No device passed to scsi_allocate_request().\n");
+    
+    SRpnt = (Scsi_Request *) kmalloc(sizeof(Scsi_Request), GFP_ATOMIC);
+    if( SRpnt == NULL )
+    {
+        return NULL;
+    }
+    
+    memset(SRpnt, 0, sizeof(Scsi_Request));
+    SRpnt->sr_device = device;
+    SRpnt->sr_host = device->host;
+    SRpnt->sr_magic = SCSI_REQ_MAGIC;
+    SRpnt->sr_data_direction = SCSI_DATA_UNKNOWN;
+    
+    return SRpnt;
+}
+
+/*
+ * Function:    scsi_release_request
+ *
+ * Purpose:     Release a request descriptor.
+ *
+ * Arguments:   device    - device for which we want a request
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to request block.
+ *
+ * Notes:       With the new queueing code, it becomes important
+ *              to track the difference between a command and a
+ *              request.  A request is a pending item in the queue that
+ *              has not yet reached the top of the queue.  We still need
+ *              to free a request when we are done with it, of course.
+ */
+void scsi_release_request(Scsi_Request * req)
+{
+    if( req->sr_command != NULL )
+    {
+#ifdef SMH_DEBUG
+        printk("scsi_release_request: req->sr_command = %p\n", 
+                   req->sr_command); 
+#endif
+        scsi_release_command(req->sr_command);
+        req->sr_command = NULL;
+#ifdef SMHHACK 
+        req->freeaddr = 0x1234; 
+#endif
+    }
+    
+    kfree(req);
+}
+
+/*
+ * Function:    scsi_allocate_device
+ *
+ * Purpose:     Allocate a command descriptor.
+ *
+ * Arguments:   device    - device for which we want a command descriptor
+ *              wait      - 1 if we should wait in the event that none
+ *                          are available.
+ *              interruptible - 1 if we should unblock and return NULL
+ *                          in the event that we must wait, and a signal
+ *                          arrives.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to command descriptor.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *
+ *              If the wait flag is true, and we are waiting for a free
+ *              command block, this function will interrupt and return
+ *              NULL in the event that a signal arrives that needs to
+ *              be handled.
+ *
+ *              This function is deprecated, and drivers should be
+ *              rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ */
+
+Scsi_Cmnd *scsi_allocate_device(Scsi_Device * device, int wait, 
+                                int interruptable)
+{
+ 	struct Scsi_Host *host;
+  	Scsi_Cmnd *SCpnt = NULL;
+	Scsi_Device *SDpnt;
+	unsigned long flags;
+  
+  	if (!device)
+  		panic("No device passed to scsi_allocate_device().\n");
+  
+  	host = device->host;
+  
+	spin_lock_irqsave(&device_request_lock, flags);
+ 
+	while (1 == 1) {
+		SCpnt = NULL;
+		if (!device->device_blocked) {
+			if (device->single_lun) {
+				/*
+				 * FIXME(eric) - this is not at all optimal.  Given that
+				 * single lun devices are rare and usually slow
+				 * (i.e. CD changers), this is good enough for now, but
+				 * we may want to come back and optimize this later.
+				 *
+				 * Scan through all of the devices attached to this
+				 * host, and see if any are active or not.  If so,
+				 * we need to defer this command.
+				 *
+				 * We really need a busy counter per device.  This would
+				 * allow us to more easily figure out whether we should
+				 * do anything here or not.
+				 */
+				for (SDpnt = host->host_queue;
+				     SDpnt;
+				     SDpnt = SDpnt->next) {
+					/*
+					 * Only look for other devices on the same bus
+					 * with the same target ID.
+					 */
+					if (SDpnt->channel != device->channel
+					    || SDpnt->id != device->id
+					    || SDpnt == device) {
+ 						continue;
+					}
+                                        if( atomic_read(&SDpnt->device_active) != 0)
+                                        {
+                                                break;
+                                        }
+				}
+				if (SDpnt) {
+					/*
+					 * Some other device in this cluster is busy.
+					 * If asked to wait, we need to wait, otherwise
+					 * return NULL.
+					 */
+					SCpnt = NULL;
+					goto busy;
+				}
+			}
+			/*
+			 * Now we can check for a free command block for this device.
+			 */
+			for (SCpnt = device->device_queue; SCpnt; SCpnt = SCpnt->next) {
+				if (SCpnt->request.rq_status == RQ_INACTIVE)
+					break;
+			}
+		}
+		/*
+		 * If we couldn't find a free command block, and we have been
+		 * asked to wait, then do so.
+		 */
+		if (SCpnt) {
+			break;
+		}
+      busy:
+		/*
+		 * If we have been asked to wait for a free block, then
+		 * wait here.
+		 */
+		if (wait) {
+                    printk("XXX smh: scsi cannot wait for free cmd block.\n"); 
+                    BUG(); 
+#if 0 
+                        DECLARE_WAITQUEUE(wait, current);
+
+                        /*
+                         * We need to wait for a free commandblock.  We need to
+                         * insert ourselves into the list before we release the
+                         * lock.  This way if a block were released the same
+                         * microsecond that we released the lock, the call
+                         * to schedule() wouldn't block (well, it might switch,
+                         * but the current task will still be schedulable.
+                         */
+                        add_wait_queue(&device->scpnt_wait, &wait);
+                        if( interruptable ) {
+                                set_current_state(TASK_INTERRUPTIBLE);
+                        } else {
+                                set_current_state(TASK_UNINTERRUPTIBLE);
+                        }
+
+                        spin_unlock_irqrestore(&device_request_lock, flags);
+
+			/*
+			 * This should block until a device command block
+			 * becomes available.
+			 */
+                        schedule();
+
+			spin_lock_irqsave(&device_request_lock, flags);
+
+                        remove_wait_queue(&device->scpnt_wait, &wait);
+                        /*
+                         * FIXME - Isn't this redundant??  Someone
+                         * else will have forced the state back to running.
+                         */
+                        set_current_state(TASK_RUNNING);
+                        /*
+                         * In the event that a signal has arrived that we need
+                         * to consider, then simply return NULL.  Everyone
+                         * that calls us should be prepared for this
+                         * possibility, and pass the appropriate code back
+                         * to the user.
+                         */
+                        if( interruptable ) {
+                                if (signal_pending(current)) {
+                                        spin_unlock_irqrestore(&device_request_lock, flags);
+                                        return NULL;
+                                }
+                        }
+#endif
+		} else {
+                        spin_unlock_irqrestore(&device_request_lock, flags);
+			return NULL;
+		}
+	}
+
+	SCpnt->request.rq_status = RQ_SCSI_BUSY;
+	SCpnt->request.waiting = NULL;	/* And no one is waiting for this
+					 * to complete */
+	atomic_inc(&SCpnt->host->host_active);
+	atomic_inc(&SCpnt->device->device_active);
+
+	SCpnt->buffer  = NULL;
+	SCpnt->bufflen = 0;
+	SCpnt->request_buffer = NULL;
+	SCpnt->request_bufflen = 0;
+
+	SCpnt->use_sg = 0;	/* Reset the scatter-gather flag */
+	SCpnt->old_use_sg = 0;
+	SCpnt->transfersize = 0;	/* No default transfer size */
+	SCpnt->cmd_len = 0;
+
+	SCpnt->sc_data_direction = SCSI_DATA_UNKNOWN;
+	SCpnt->sc_request = NULL;
+	SCpnt->sc_magic = SCSI_CMND_MAGIC;
+
+        SCpnt->result = 0;
+	SCpnt->underflow = 0;	/* Do not flag underflow conditions */
+	SCpnt->old_underflow = 0;
+	SCpnt->resid = 0;
+	SCpnt->state = SCSI_STATE_INITIALIZING;
+	SCpnt->owner = SCSI_OWNER_HIGHLEVEL;
+
+	spin_unlock_irqrestore(&device_request_lock, flags);
+
+	SCSI_LOG_MLQUEUE(5, printk("Activating command for device %d (%d)\n",
+				   SCpnt->target,
+				atomic_read(&SCpnt->host->host_active)));
+
+	return SCpnt;
+}
+
+inline void __scsi_release_command(Scsi_Cmnd * SCpnt)
+{
+	unsigned long flags;
+        Scsi_Device * SDpnt;
+
+	spin_lock_irqsave(&device_request_lock, flags);
+
+        SDpnt = SCpnt->device;
+
+	SCpnt->request.rq_status = RQ_INACTIVE;
+	SCpnt->state = SCSI_STATE_UNUSED;
+	SCpnt->owner = SCSI_OWNER_NOBODY;
+	atomic_dec(&SCpnt->host->host_active);
+	atomic_dec(&SDpnt->device_active);
+
+	SCSI_LOG_MLQUEUE(5, printk("Deactivating command for device %d (active=%d, failed=%d)\n",
+				   SCpnt->target,
+				   atomic_read(&SCpnt->host->host_active),
+				   SCpnt->host->host_failed));
+	if (SCpnt->host->host_failed != 0) {
+		SCSI_LOG_ERROR_RECOVERY(5, printk("Error handler thread %d %d\n",
+						SCpnt->host->in_recovery,
+						SCpnt->host->eh_active));
+	}
+	/*
+	 * If the host is having troubles, then look to see if this was the last
+	 * command that might have failed.  If so, wake up the error handler.
+	 */
+	if (SCpnt->host->in_recovery
+	    && !SCpnt->host->eh_active
+	    && SCpnt->host->host_busy == SCpnt->host->host_failed) {
+		SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n",
+			     atomic_read(&SCpnt->host->eh_wait->count)));
+#if 0
+		up(SCpnt->host->eh_wait);
+#endif
+	}
+
+	spin_unlock_irqrestore(&device_request_lock, flags);
+
+#if 0
+        /*
+         * Wake up anyone waiting for this device.  Do this after we
+         * have released the lock, as they will need it as soon as
+         * they wake up.  
+         */
+	wake_up(&SDpnt->scpnt_wait);
+#endif
+
+}
+
+/*
+ * Function:    scsi_release_command
+ *
+ * Purpose:     Release a command block.
+ *
+ * Arguments:   SCpnt - command block we are releasing.
+ *
+ * Notes:       The command block can no longer be used by the caller once
+ *              this funciton is called.  This is in effect the inverse
+ *              of scsi_allocate_device.  Note that we also must perform
+ *              a couple of additional tasks.  We must first wake up any
+ *              processes that might have blocked waiting for a command
+ *              block, and secondly we must hit the queue handler function
+ *              to make sure that the device is busy.  Note - there is an
+ *              option to not do this - there were instances where we could
+ *              recurse too deeply and blow the stack if this happened
+ *              when we were indirectly called from the request function
+ *              itself.
+ *
+ *              The idea is that a lot of the mid-level internals gunk
+ *              gets hidden in this function.  Upper level drivers don't
+ *              have any chickens to wave in the air to get things to
+ *              work reliably.
+ *
+ *              This function is deprecated, and drivers should be
+ *              rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ */
+void scsi_release_command(Scsi_Cmnd * SCpnt)
+{
+        request_queue_t *q;
+        Scsi_Device * SDpnt;
+
+        SDpnt = SCpnt->device;
+
+        __scsi_release_command(SCpnt);
+
+        /*
+         * Finally, hit the queue request function to make sure that
+         * the device is actually busy if there are requests present.
+         * This won't block - if the device cannot take any more, life
+         * will go on.  
+         */
+        q = &SDpnt->request_queue;
+        scsi_queue_next_request(q, NULL);                
+}
+
+/*
+ * Function:    scsi_dispatch_command
+ *
+ * Purpose:     Dispatch a command to the low-level driver.
+ *
+ * Arguments:   SCpnt - command block we are dispatching.
+ *
+ * Notes:
+ */
+int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt)
+{
+#ifdef DEBUG_DELAY
+	unsigned long clock;
+#endif
+	struct Scsi_Host *host;
+	int rtn = 0;
+	unsigned long flags = 0;
+	unsigned long timeout;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+#if DEBUG
+	unsigned long *ret = 0;
+#ifdef __mips__
+	__asm__ __volatile__("move\t%0,$31":"=r"(ret));
+#else
+	ret = __builtin_return_address(0);
+#endif
+#endif
+
+	host = SCpnt->host;
+
+	/* Assign a unique nonzero serial_number. */
+	if (++serial_number == 0)
+		serial_number = 1;
+	SCpnt->serial_number = serial_number;
+	SCpnt->pid = scsi_pid++;
+
+	/*
+	 * We will wait MIN_RESET_DELAY clock ticks after the last reset so
+	 * we can avoid the drive not being ready.
+	 */
+	timeout = host->last_reset + MIN_RESET_DELAY;
+
+	if (host->resetting && time_before(jiffies, timeout)) {
+		int ticks_remaining = timeout - jiffies;
+		/*
+		 * NOTE: This may be executed from within an interrupt
+		 * handler!  This is bad, but for now, it'll do.  The irq
+		 * level of the interrupt handler has been masked out by the
+		 * platform dependent interrupt handling code already, so the
+		 * sti() here will not cause another call to the SCSI host's
+		 * interrupt handler (assuming there is one irq-level per
+		 * host).
+		 */
+		while (--ticks_remaining >= 0)
+			mdelay(1 + 999 / HZ);
+		host->resetting = 0;
+	}
+	if (host->hostt->use_new_eh_code) {
+		scsi_add_timer(SCpnt, SCpnt->timeout_per_command, scsi_times_out);
+	} else {
+#if 0
+		scsi_add_timer(SCpnt, SCpnt->timeout_per_command,
+			       scsi_old_times_out);
+#endif
+	}
+
+	/*
+	 * We will use a queued command if possible, otherwise we will emulate the
+	 * queuing and calling of completion function ourselves.
+	 */
+	SCSI_LOG_MLQUEUE(3, printk("scsi_dispatch_cmnd (host = %d, channel = %d, target = %d, "
+	       "command = %p, buffer = %p, \nbufflen = %d, done = %p)\n",
+	SCpnt->host->host_no, SCpnt->channel, SCpnt->target, SCpnt->cmnd,
+			    SCpnt->buffer, SCpnt->bufflen, SCpnt->done));
+
+	SCpnt->state = SCSI_STATE_QUEUED;
+	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+	if (host->can_queue) {
+		SCSI_LOG_MLQUEUE(3, printk("queuecommand : routine at %p\n",
+					   host->hostt->queuecommand));
+		/*
+		 * Use the old error handling code if we haven't converted the driver
+		 * to use the new one yet.  Note - only the new queuecommand variant
+		 * passes a meaningful return value.
+		 */
+		if (host->hostt->use_new_eh_code) {
+			/*
+			 * Before we queue this command, check if the command
+			 * length exceeds what the host adapter can handle.
+			 */
+			if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) {
+				spin_lock_irqsave(&io_request_lock, flags);
+				rtn = host->hostt->queuecommand(SCpnt, scsi_done);
+				spin_unlock_irqrestore(&io_request_lock, flags);
+				if (rtn != 0) {
+					scsi_delete_timer(SCpnt);
+					scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY);
+					SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n"));                                
+				}
+			} else {
+				SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n"));
+				SCpnt->result = (DID_ABORT << 16);
+				spin_lock_irqsave(&io_request_lock, flags);
+				scsi_done(SCpnt);
+				spin_unlock_irqrestore(&io_request_lock, flags);
+				rtn = 1;
+			}
+		} else {
+			/*
+			 * Before we queue this command, check if the command
+			 * length exceeds what the host adapter can handle.
+			 */
+#if 0
+                    if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) {
+				spin_lock_irqsave(&io_request_lock, flags);
+				host->hostt->queuecommand(SCpnt, scsi_old_done);
+				spin_unlock_irqrestore(&io_request_lock, flags);
+			} else {
+				SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n"));
+				SCpnt->result = (DID_ABORT << 16);
+				spin_lock_irqsave(&io_request_lock, flags);
+				scsi_old_done(SCpnt);
+				spin_unlock_irqrestore(&io_request_lock, flags);
+				rtn = 1;
+			}
+#endif
+
+		}
+	} else {
+		int temp;
+
+		SCSI_LOG_MLQUEUE(3, printk("command() :  routine at %p\n", host->hostt->command));
+                spin_lock_irqsave(&io_request_lock, flags);
+		temp = host->hostt->command(SCpnt);
+		SCpnt->result = temp;
+#ifdef DEBUG_DELAY
+                spin_unlock_irqrestore(&io_request_lock, flags);
+		clock = jiffies + 4 * HZ;
+		while (time_before(jiffies, clock)) {
+			barrier();
+			cpu_relax();
+		}
+		printk("done(host = %d, result = %04x) : routine at %p\n",
+		       host->host_no, temp, host->hostt->command);
+                spin_lock_irqsave(&io_request_lock, flags);
+#endif
+		if (host->hostt->use_new_eh_code) {
+			scsi_done(SCpnt);
+		} else {
+#if 0
+			scsi_old_done(SCpnt);
+#endif
+		}
+                spin_unlock_irqrestore(&io_request_lock, flags);
+	}
+	SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n"));
+	return rtn;
+}
+
+#ifdef DEVFS_MUST_DIE
+devfs_handle_t scsi_devfs_handle;
+#endif
+
+/*
+ * scsi_do_cmd sends all the commands out to the low-level driver.  It
+ * handles the specifics required for each low level driver - ie queued
+ * or non queued.  It also prevents conflicts when different high level
+ * drivers go for the same host at the same time.
+ */
+
+void scsi_wait_req (Scsi_Request * SRpnt, const void *cmnd ,
+ 		  void *buffer, unsigned bufflen, 
+ 		  int timeout, int retries)
+{
+#if 0
+    DECLARE_COMPLETION(wait);
+#endif
+
+
+    request_queue_t *q = &SRpnt->sr_device->request_queue;
+    
+#if 0
+    SRpnt->sr_request.waiting = &wait;
+#endif
+
+
+    SRpnt->sr_request.rq_status = RQ_SCSI_BUSY;
+    scsi_do_req (SRpnt, (void *) cmnd,
+                 buffer, bufflen, scsi_wait_done, timeout, retries);
+    generic_unplug_device(q);
+
+
+#if 0
+    wait_for_completion(&wait);
+#endif
+
+    /* XXX SMH: in 'standard' driver we think everythings ok here since
+       we've waited on &wait -- hence we deallocate the command structure
+       if it hasn't been done already. This is not the correct behaviour 
+       in xen ... hmm .. how to fix? */
+    mdelay(500); 
+
+
+    SRpnt->sr_request.waiting = NULL;
+
+    if( SRpnt->sr_command != NULL )
+    {
+#ifdef SMH_DEBUG
+        printk("scsi_wait_req: releasing SRpnt->sr_command = %p\n", 
+               SRpnt->sr_command); 
+#endif
+        scsi_release_command(SRpnt->sr_command);
+        SRpnt->sr_command = NULL;
+#ifdef SMHHACK 
+        SRpnt->freeaddr = 0x99991234; 
+#endif
+    }
+    
+}
+ 
+/*
+ * Function:    scsi_do_req
+ *
+ * Purpose:     Queue a SCSI request
+ *
+ * Arguments:   SRpnt     - command descriptor.
+ *              cmnd      - actual SCSI command to be performed.
+ *              buffer    - data buffer.
+ *              bufflen   - size of data buffer.
+ *              done      - completion function to be run.
+ *              timeout   - how long to let it run before timeout.
+ *              retries   - number of retries we allow.
+ *
+ * Lock status: With the new queueing code, this is SMP-safe, and no locks
+ *              need be held upon entry.   The old queueing code the lock was
+ *              assumed to be held upon entry.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              Also, this function is now only used for queueing requests
+ *              for things like ioctls and character device requests - this
+ *              is because we essentially just inject a request into the
+ *              queue for the device. Normal block device handling manipulates
+ *              the queue directly.
+ */
+void scsi_do_req(Scsi_Request * SRpnt, const void *cmnd,
+	      void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
+		 int timeout, int retries)
+{
+	Scsi_Device * SDpnt = SRpnt->sr_device;
+	struct Scsi_Host *host = SDpnt->host;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	SCSI_LOG_MLQUEUE(4,
+			 {
+			 int i;
+			 int target = SDpnt->id;
+			 int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
+			 printk("scsi_do_req (host = %d, channel = %d target = %d, "
+		    "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
+				"retries = %d)\n"
+				"command : ", host->host_no, SDpnt->channel, target, buffer,
+				bufflen, done, timeout, retries);
+			 for (i	 = 0; i < size; ++i)
+			 	printk("%02x  ", ((unsigned char *) cmnd)[i]);
+			 	printk("\n");
+			 });
+
+	if (!host) {
+		panic("Invalid or not present host.\n");
+	}
+
+	/*
+	 * If the upper level driver is reusing these things, then
+	 * we should release the low-level block now.  Another one will
+	 * be allocated later when this request is getting queued.
+	 */
+	if( SRpnt->sr_command != NULL )
+	{ 
+#ifdef SMH_DEBUG
+           printk("scsi_do_req: releasing SRpnt->sr_command = %p\n", 
+                   SRpnt->sr_command); 
+#endif
+		scsi_release_command(SRpnt->sr_command);
+		SRpnt->sr_command = NULL;
+#ifdef SMHHACK
+                SRpnt->freeaddr = 0xabbadead;
+#endif
+	}
+
+	/*
+	 * We must prevent reentrancy to the lowlevel host driver.
+	 * This prevents it - we enter a loop until the host we want
+	 * to talk to is not busy.  Race conditions are prevented, as
+	 * interrupts are disabled in between the time we check for
+	 * the host being not busy, and the time we mark it busy
+	 * ourselves.  */
+
+
+	/*
+	 * Our own function scsi_done (which marks the host as not
+	 * busy, disables the timeout counter, etc) will be called by
+	 * us or by the scsi_hosts[host].queuecommand() function needs
+	 * to also call the completion function for the high level
+	 * driver.  */
+
+	memcpy((void *) SRpnt->sr_cmnd, (const void *) cmnd, 
+	       sizeof(SRpnt->sr_cmnd));
+#ifdef SMHHACK
+        SRpnt->freeaddr = 0x1111; 
+#endif
+
+	SRpnt->sr_bufflen = bufflen;
+	SRpnt->sr_buffer = buffer;
+	SRpnt->sr_allowed = retries;
+	SRpnt->sr_done = done;
+	SRpnt->sr_timeout_per_command = timeout;
+
+	if (SRpnt->sr_cmd_len == 0)
+		SRpnt->sr_cmd_len = COMMAND_SIZE(SRpnt->sr_cmnd[0]);
+
+	/*
+	 * At this point, we merely set up the command, stick it in the normal
+	 * request queue, and return.  Eventually that request will come to the
+	 * top of the list, and will be dispatched.
+	 */
+	scsi_insert_special_req(SRpnt, 0);
+
+	SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_req()\n"));
+}
+ 
+/*
+ * Function:    scsi_init_cmd_from_req
+ *
+ * Purpose:     Queue a SCSI command
+ * Purpose:     Initialize a Scsi_Cmnd from a Scsi_Request
+ *
+ * Arguments:   SCpnt     - command descriptor.
+ *              SRpnt     - Request from the queue.
+ *
+ * Lock status: None needed.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       Mainly transfer data from the request structure to the
+ *              command structure.  The request structure is allocated
+ *              using the normal memory allocator, and requests can pile
+ *              up to more or less any depth.  The command structure represents
+ *              a consumable resource, as these are allocated into a pool
+ *              when the SCSI subsystem initializes.  The preallocation is
+ *              required so that in low-memory situations a disk I/O request
+ *              won't cause the memory manager to try and write out a page.
+ *              The request structure is generally used by ioctls and character
+ *              devices.
+ */
+void scsi_init_cmd_from_req(Scsi_Cmnd * SCpnt, Scsi_Request * SRpnt)
+{
+	struct Scsi_Host *host = SCpnt->host;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+	SRpnt->sr_command = SCpnt;
+#ifdef SMH_DEBUG
+        printk("scsi_init_cmd_from_req: SRpnt = %p, SRpnt->sr_command = %p\n", 
+               SRpnt, SRpnt->sr_command); 
+#endif        
+
+	if (!host) {
+		panic("Invalid or not present host.\n");
+	}
+
+	SCpnt->cmd_len = SRpnt->sr_cmd_len;
+	SCpnt->use_sg = SRpnt->sr_use_sg;
+
+	memcpy((void *) &SCpnt->request, (const void *) &SRpnt->sr_request,
+	       sizeof(SRpnt->sr_request));
+	memcpy((void *) SCpnt->data_cmnd, (const void *) SRpnt->sr_cmnd, 
+	       sizeof(SCpnt->data_cmnd));
+	SCpnt->reset_chain = NULL;
+	SCpnt->serial_number = 0;
+	SCpnt->serial_number_at_timeout = 0;
+	SCpnt->bufflen = SRpnt->sr_bufflen;
+	SCpnt->buffer = SRpnt->sr_buffer;
+	SCpnt->flags = 0;
+	SCpnt->retries = 0;
+	SCpnt->allowed = SRpnt->sr_allowed;
+	SCpnt->done = SRpnt->sr_done;
+	SCpnt->timeout_per_command = SRpnt->sr_timeout_per_command;
+
+	SCpnt->sc_data_direction = SRpnt->sr_data_direction;
+
+	SCpnt->sglist_len = SRpnt->sr_sglist_len;
+	SCpnt->underflow = SRpnt->sr_underflow;
+
+	SCpnt->sc_request = SRpnt;
+
+	memcpy((void *) SCpnt->cmnd, (const void *) SRpnt->sr_cmnd, 
+	       sizeof(SCpnt->cmnd));
+	/* Zero the sense buffer.  Some host adapters automatically request
+	 * sense on error.  0 is not a valid sense code.
+	 */
+	memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+	SCpnt->request_buffer = SRpnt->sr_buffer;
+	SCpnt->request_bufflen = SRpnt->sr_bufflen;
+	SCpnt->old_use_sg = SCpnt->use_sg;
+	if (SCpnt->cmd_len == 0)
+		SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+	SCpnt->old_cmd_len = SCpnt->cmd_len;
+	SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+	SCpnt->old_underflow = SCpnt->underflow;
+
+	/* Start the timer ticking.  */
+
+	SCpnt->internal_timeout = NORMAL_TIMEOUT;
+	SCpnt->abort_reason = 0;
+	SCpnt->result = 0;
+
+	SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_init_cmd_from_req()\n"));
+}
+
+/*
+ * Function:    scsi_do_cmd
+ *
+ * Purpose:     Queue a SCSI command
+ *
+ * Arguments:   SCpnt     - command descriptor.
+ *              cmnd      - actual SCSI command to be performed.
+ *              buffer    - data buffer.
+ *              bufflen   - size of data buffer.
+ *              done      - completion function to be run.
+ *              timeout   - how long to let it run before timeout.
+ *              retries   - number of retries we allow.
+ *
+ * Lock status: With the new queueing code, this is SMP-safe, and no locks
+ *              need be held upon entry.   The old queueing code the lock was
+ *              assumed to be held upon entry.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              Also, this function is now only used for queueing requests
+ *              for things like ioctls and character device requests - this
+ *              is because we essentially just inject a request into the
+ *              queue for the device. Normal block device handling manipulates
+ *              the queue directly.
+ */
+void scsi_do_cmd(Scsi_Cmnd * SCpnt, const void *cmnd,
+	      void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
+		 int timeout, int retries)
+{
+	struct Scsi_Host *host = SCpnt->host;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	SCpnt->pid = scsi_pid++;
+	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+
+	SCSI_LOG_MLQUEUE(4,
+			 {
+			 int i;
+			 int target = SCpnt->target;
+			 int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
+			 printk("scsi_do_cmd (host = %d, channel = %d target = %d, "
+		    "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
+				"retries = %d)\n"
+				"command : ", host->host_no, SCpnt->channel, target, buffer,
+				bufflen, done, timeout, retries);
+			 for (i = 0; i < size; ++i)
+			 	printk("%02x  ", ((unsigned char *) cmnd)[i]);
+			 	printk("\n");
+			 });
+
+	if (!host) {
+		panic("Invalid or not present host.\n");
+	}
+	/*
+	 * We must prevent reentrancy to the lowlevel host driver.  This prevents
+	 * it - we enter a loop until the host we want to talk to is not busy.
+	 * Race conditions are prevented, as interrupts are disabled in between the
+	 * time we check for the host being not busy, and the time we mark it busy
+	 * ourselves.
+	 */
+
+
+	/*
+	 * Our own function scsi_done (which marks the host as not busy, disables
+	 * the timeout counter, etc) will be called by us or by the
+	 * scsi_hosts[host].queuecommand() function needs to also call
+	 * the completion function for the high level driver.
+	 */
+
+	memcpy((void *) SCpnt->data_cmnd, (const void *) cmnd, 
+               sizeof(SCpnt->data_cmnd));
+	SCpnt->reset_chain = NULL;
+	SCpnt->serial_number = 0;
+	SCpnt->serial_number_at_timeout = 0;
+	SCpnt->bufflen = bufflen;
+	SCpnt->buffer = buffer;
+	SCpnt->flags = 0;
+	SCpnt->retries = 0;
+	SCpnt->allowed = retries;
+	SCpnt->done = done;
+	SCpnt->timeout_per_command = timeout;
+
+	memcpy((void *) SCpnt->cmnd, (const void *) cmnd, 
+               sizeof(SCpnt->cmnd));
+	/* Zero the sense buffer.  Some host adapters automatically request
+	 * sense on error.  0 is not a valid sense code.
+	 */
+	memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+	SCpnt->request_buffer = buffer;
+	SCpnt->request_bufflen = bufflen;
+	SCpnt->old_use_sg = SCpnt->use_sg;
+	if (SCpnt->cmd_len == 0)
+		SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+	SCpnt->old_cmd_len = SCpnt->cmd_len;
+	SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+	SCpnt->old_underflow = SCpnt->underflow;
+
+	/* Start the timer ticking.  */
+
+	SCpnt->internal_timeout = NORMAL_TIMEOUT;
+	SCpnt->abort_reason = 0;
+	SCpnt->result = 0;
+
+	/*
+	 * At this point, we merely set up the command, stick it in the normal
+	 * request queue, and return.  Eventually that request will come to the
+	 * top of the list, and will be dispatched.
+	 */
+	scsi_insert_special_cmd(SCpnt, 0);
+
+	SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_cmd()\n"));
+}
+
+/*
+ * This function is the mid-level interrupt routine, which decides how
+ *  to handle error conditions.  Each invocation of this function must
+ *  do one and *only* one of the following:
+ *
+ *      1) Insert command in BH queue.
+ *      2) Activate error handler for host.
+ *
+ * FIXME(eric) - I am concerned about stack overflow (still).  An
+ * interrupt could come while we are processing the bottom queue,
+ * which would cause another command to be stuffed onto the bottom
+ * queue, and it would in turn be processed as that interrupt handler
+ * is returning.  Given a sufficiently steady rate of returning
+ * commands, this could cause the stack to overflow.  I am not sure
+ * what is the most appropriate solution here - we should probably
+ * keep a depth count, and not process any commands while we still
+ * have a bottom handler active higher in the stack.
+ *
+ * There is currently code in the bottom half handler to monitor
+ * recursion in the bottom handler and report if it ever happens.  If
+ * this becomes a problem, it won't be hard to engineer something to
+ * deal with it so that only the outer layer ever does any real
+ * processing.  
+ */
+void scsi_done(Scsi_Cmnd * SCpnt)
+{
+	unsigned long flags;
+	int tstatus;
+
+	/*
+	 * We don't have to worry about this one timing out any more.
+	 */
+	tstatus = scsi_delete_timer(SCpnt);
+
+	/*
+	 * If we are unable to remove the timer, it means that the command
+	 * has already timed out.  In this case, we have no choice but to
+	 * let the timeout function run, as we have no idea where in fact
+	 * that function could really be.  It might be on another processor,
+	 * etc, etc.
+	 */
+	if (!tstatus) {
+		SCpnt->done_late = 1;
+		return;
+	}
+	/* Set the serial numbers back to zero */
+	SCpnt->serial_number = 0;
+
+	/*
+	 * First, see whether this command already timed out.  If so, we ignore
+	 * the response.  We treat it as if the command never finished.
+	 *
+	 * Since serial_number is now 0, the error handler cound detect this
+	 * situation and avoid to call the low level driver abort routine.
+	 * (DB)
+         *
+         * FIXME(eric) - I believe that this test is now redundant, due to
+         * the test of the return status of del_timer().
+	 */
+	if (SCpnt->state == SCSI_STATE_TIMEOUT) {
+		SCSI_LOG_MLCOMPLETE(1, printk("Ignoring completion of %p due to timeout status", SCpnt));
+		return;
+	}
+	spin_lock_irqsave(&scsi_bhqueue_lock, flags);
+
+	SCpnt->serial_number_at_timeout = 0;
+	SCpnt->state = SCSI_STATE_BHQUEUE;
+	SCpnt->owner = SCSI_OWNER_BH_HANDLER;
+	SCpnt->bh_next = NULL;
+
+	/*
+	 * Next, put this command in the BH queue.
+	 * 
+	 * We need a spinlock here, or compare and exchange if we can reorder incoming
+	 * Scsi_Cmnds, as it happens pretty often scsi_done is called multiple times
+	 * before bh is serviced. -jj
+	 *
+	 * We already have the io_request_lock here, since we are called from the
+	 * interrupt handler or the error handler. (DB)
+	 *
+	 * This may be true at the moment, but I would like to wean all of the low
+	 * level drivers away from using io_request_lock.   Technically they should
+	 * all use their own locking.  I am adding a small spinlock to protect
+	 * this datastructure to make it safe for that day.  (ERY)
+	 */
+	if (!scsi_bh_queue_head) {
+		scsi_bh_queue_head = SCpnt;
+		scsi_bh_queue_tail = SCpnt;
+	} else {
+		scsi_bh_queue_tail->bh_next = SCpnt;
+		scsi_bh_queue_tail = SCpnt;
+	}
+
+	spin_unlock_irqrestore(&scsi_bhqueue_lock, flags);
+	/*
+	 * Mark the bottom half handler to be run.
+	 */
+	mark_bh(SCSI_BH);
+}
+
+/*
+ * Procedure:   scsi_bottom_half_handler
+ *
+ * Purpose:     Called after we have finished processing interrupts, it
+ *              performs post-interrupt handling for commands that may
+ *              have completed.
+ *
+ * Notes:       This is called with all interrupts enabled.  This should reduce
+ *              interrupt latency, stack depth, and reentrancy of the low-level
+ *              drivers.
+ *
+ * The io_request_lock is required in all the routine. There was a subtle
+ * race condition when scsi_done is called after a command has already
+ * timed out but before the time out is processed by the error handler.
+ * (DB)
+ *
+ * I believe I have corrected this.  We simply monitor the return status of
+ * del_timer() - if this comes back as 0, it means that the timer has fired
+ * and that a timeout is in progress.   I have modified scsi_done() such
+ * that in this instance the command is never inserted in the bottom
+ * half queue.  Thus the only time we hold the lock here is when
+ * we wish to atomically remove the contents of the queue.
+ */
+void scsi_bottom_half_handler(void)
+{
+    Scsi_Cmnd *SCpnt;
+    Scsi_Cmnd *SCnext;
+    unsigned long flags;
+
+
+    while (1 == 1) {
+        spin_lock_irqsave(&scsi_bhqueue_lock, flags);
+        SCpnt = scsi_bh_queue_head;
+        scsi_bh_queue_head = NULL;
+        spin_unlock_irqrestore(&scsi_bhqueue_lock, flags);
+
+        if (SCpnt == NULL) {
+            return;
+        }
+        SCnext = SCpnt->bh_next;
+
+        for (; SCpnt; SCpnt = SCnext) {
+            SCnext = SCpnt->bh_next;
+
+            switch (scsi_decide_disposition(SCpnt)) {
+            case SUCCESS:
+                /*
+                 * Add to BH queue.
+                 */
+                SCSI_LOG_MLCOMPLETE(3, 
+                                    printk("Command finished %d %d 0x%x\n", 
+                                           SCpnt->host->host_busy,
+                                           SCpnt->host->host_failed,
+                                           SCpnt->result));
+                
+                scsi_finish_command(SCpnt);
+                break;
+            case NEEDS_RETRY:
+                /*
+                 * We only come in here if we want to retry a command.
+                 * The test to see whether the command should be
+                 * retried should be keeping track of the number of
+                 * tries, so we don't end up looping, of course.  */
+                SCSI_LOG_MLCOMPLETE(3, 
+                                    printk("Command needs retry %d %d 0x%x\n",
+                                           SCpnt->host->host_busy, 
+                                           SCpnt->host->host_failed, 
+                                           SCpnt->result));
+
+                scsi_retry_command(SCpnt);
+                break;
+            case ADD_TO_MLQUEUE:
+                /* 
+                 * This typically happens for a QUEUE_FULL message -
+                 * typically only when the queue depth is only
+                 * approximate for a given device.  Adding a command
+                 * to the queue for the device will prevent further commands
+                 * from being sent to the device, so we shouldn't end up
+                 * with tons of things being sent down that shouldn't be.
+                 */
+                SCSI_LOG_MLCOMPLETE(3, printk(
+                    "Cmnd rejected as device queue full, put on ml queue %p\n",
+                    SCpnt));
+                scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_DEVICE_BUSY);
+                break;
+            default:
+                /*
+                 * Here we have a fatal error of some sort.  Turn it over to
+                 * the error handler.
+                 */
+                SCSI_LOG_MLCOMPLETE(3, printk(
+                    "Command failed %p %x active=%d busy=%d failed=%d\n",
+                    SCpnt, SCpnt->result,
+                    atomic_read(&SCpnt->host->host_active),
+                    SCpnt->host->host_busy,
+                    SCpnt->host->host_failed));
+                
+                /*
+                 * Dump the sense information too.
+                 */
+                if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) {
+                    SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt));
+                }
+                if (SCpnt->host->eh_wait != NULL) {
+                    SCpnt->host->host_failed++;
+                    SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+                    SCpnt->state = SCSI_STATE_FAILED;
+                    SCpnt->host->in_recovery = 1;
+                    /*
+                     * If the host is having troubles, then look to
+                     * see if this was the last command that might
+                     * have failed.  If so, wake up the error handler.  */
+                    if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
+                        SCSI_LOG_ERROR_RECOVERY(5, printk(
+                            "Waking error handler thread (%d)\n",
+                            atomic_read(&SCpnt->host->eh_wait->count)));
+#if 0
+                        up(SCpnt->host->eh_wait);
+#endif
+                    }
+                } else {
+                    /*
+                     * We only get here if the error recovery thread has died.
+                     */
+                    printk("scsi_bh: error finish\n"); 
+                    scsi_finish_command(SCpnt);
+                }
+            }
+        }		/* for(; SCpnt...) */
+        
+    }			/* while(1==1) */
+    
+}
+
+/*
+ * Function:    scsi_retry_command
+ *
+ * Purpose:     Send a command back to the low level to be retried.
+ *
+ * Notes:       This command is always executed in the context of the
+ *              bottom half handler, or the error handler thread. Low
+ *              level drivers should not become re-entrant as a result of
+ *              this.
+ */
+int scsi_retry_command(Scsi_Cmnd * SCpnt)
+{
+	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+	       sizeof(SCpnt->data_cmnd));
+	SCpnt->request_buffer = SCpnt->buffer;
+	SCpnt->request_bufflen = SCpnt->bufflen;
+	SCpnt->use_sg = SCpnt->old_use_sg;
+	SCpnt->cmd_len = SCpnt->old_cmd_len;
+	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+	SCpnt->underflow = SCpnt->old_underflow;
+
+        /*
+         * Zero the sense information from the last time we tried
+         * this command.
+         */
+	memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+
+	return scsi_dispatch_cmd(SCpnt);
+}
+
+/*
+ * Function:    scsi_finish_command
+ *
+ * Purpose:     Pass command off to upper layer for finishing of I/O
+ *              request, waking processes that are waiting on results,
+ *              etc.
+ */
+void scsi_finish_command(Scsi_Cmnd * SCpnt)
+{
+    struct Scsi_Host *host;
+    Scsi_Device *device;
+    Scsi_Request * SRpnt;
+    unsigned long flags;
+
+    ASSERT_LOCK(&io_request_lock, 0);
+
+    host = SCpnt->host;
+    device = SCpnt->device;
+
+    /*
+     * We need to protect the decrement, as otherwise a race condition
+     * would exist.  Fiddling with SCpnt isn't a problem as the
+     * design only allows a single SCpnt to be active in only
+     * one execution context, but the device and host structures are
+     * shared.
+     */
+    spin_lock_irqsave(&io_request_lock, flags);
+    host->host_busy--;	/* Indicate that we are free */
+    device->device_busy--;	/* Decrement device usage counter. */
+    spin_unlock_irqrestore(&io_request_lock, flags);
+    
+    /*
+     * Clear the flags which say that the device/host is no longer
+     * capable of accepting new commands.  These are set in scsi_queue.c
+     * for both the queue full condition on a device, and for a
+     * host full condition on the host.
+     */
+    host->host_blocked = FALSE;
+    device->device_blocked = FALSE;
+    
+    /*
+     * If we have valid sense information, then some kind of recovery
+     * must have taken place.  Make a note of this.
+     */
+    if (scsi_sense_valid(SCpnt)) {
+        SCpnt->result |= (DRIVER_SENSE << 24);
+    }
+    SCSI_LOG_MLCOMPLETE(3, printk(
+        "Notifying upper driver of completion for device %d %x\n",
+        SCpnt->device->id, SCpnt->result));
+
+    SCpnt->owner = SCSI_OWNER_HIGHLEVEL;
+    SCpnt->state = SCSI_STATE_FINISHED;
+    
+    /* We can get here with use_sg=0, causing a panic in the 
+       upper level (DB) */
+    SCpnt->use_sg = SCpnt->old_use_sg;
+
+    /*
+     * If there is an associated request structure, copy the data over 
+     * before we call the * completion function.
+     */
+    SRpnt = SCpnt->sc_request;
+
+    if( SRpnt != NULL ) {
+        if(!SRpnt->sr_command) { 
+            printk("scsi_finish_command: SRpnt=%p, SRpnt->sr_command=%p\n", 
+                   SRpnt, SRpnt->sr_command); 
+            printk("SRpnt->freeaddr = %p\n", SRpnt->freeaddr); 
+            BUG(); 
+        }
+        SRpnt->sr_result = SRpnt->sr_command->result;
+        if( SRpnt->sr_result != 0 ) {
+            memcpy(SRpnt->sr_sense_buffer,
+                   SRpnt->sr_command->sense_buffer,
+                   sizeof(SRpnt->sr_sense_buffer));
+        }
+    }
+
+    SCpnt->done(SCpnt);
+}
+
+static int scsi_register_host(Scsi_Host_Template *);
+static int scsi_unregister_host(Scsi_Host_Template *);
+
+/*
+ * Function:    scsi_release_commandblocks()
+ *
+ * Purpose:     Release command blocks associated with a device.
+ *
+ * Arguments:   SDpnt   - device
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:
+ */
+void scsi_release_commandblocks(Scsi_Device * SDpnt)
+{
+	Scsi_Cmnd *SCpnt, *SCnext;
+	unsigned long flags;
+
+ 	spin_lock_irqsave(&device_request_lock, flags);
+	for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCnext) {
+		SDpnt->device_queue = SCnext = SCpnt->next;
+		kfree((char *) SCpnt);
+	}
+	SDpnt->has_cmdblocks = 0;
+	SDpnt->queue_depth = 0;
+	spin_unlock_irqrestore(&device_request_lock, flags);
+}
+
+/*
+ * Function:    scsi_build_commandblocks()
+ *
+ * Purpose:     Allocate command blocks associated with a device.
+ *
+ * Arguments:   SDpnt   - device
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:
+ */
+void scsi_build_commandblocks(Scsi_Device * SDpnt)
+{
+	unsigned long flags;
+	struct Scsi_Host *host = SDpnt->host;
+	int j;
+	Scsi_Cmnd *SCpnt;
+
+	spin_lock_irqsave(&device_request_lock, flags);
+
+	if (SDpnt->queue_depth == 0)
+	{
+		SDpnt->queue_depth = host->cmd_per_lun;
+		if (SDpnt->queue_depth == 0)
+			SDpnt->queue_depth = 1; /* live to fight another day */
+	}
+	SDpnt->device_queue = NULL;
+
+	for (j = 0; j < SDpnt->queue_depth; j++) {
+		SCpnt = (Scsi_Cmnd *)
+		    kmalloc(sizeof(Scsi_Cmnd),
+				     GFP_ATOMIC |
+				(host->unchecked_isa_dma ? GFP_DMA : 0));
+		if (NULL == SCpnt)
+			break;	/* If not, the next line will oops ... */
+		memset(SCpnt, 0, sizeof(Scsi_Cmnd));
+		SCpnt->host = host;
+		SCpnt->device = SDpnt;
+		SCpnt->target = SDpnt->id;
+		SCpnt->lun = SDpnt->lun;
+		SCpnt->channel = SDpnt->channel;
+		SCpnt->request.rq_status = RQ_INACTIVE;
+		SCpnt->use_sg = 0;
+		SCpnt->old_use_sg = 0;
+		SCpnt->old_cmd_len = 0;
+		SCpnt->underflow = 0;
+		SCpnt->old_underflow = 0;
+		SCpnt->transfersize = 0;
+		SCpnt->resid = 0;
+		SCpnt->serial_number = 0;
+		SCpnt->serial_number_at_timeout = 0;
+		SCpnt->host_scribble = NULL;
+		SCpnt->next = SDpnt->device_queue;
+		SDpnt->device_queue = SCpnt;
+		SCpnt->state = SCSI_STATE_UNUSED;
+		SCpnt->owner = SCSI_OWNER_NOBODY;
+	}
+	if (j < SDpnt->queue_depth) {	/* low on space (D.Gilbert 990424) */
+		printk(KERN_WARNING "scsi_build_commandblocks: want=%d, space for=%d blocks\n",
+		       SDpnt->queue_depth, j);
+		SDpnt->queue_depth = j;
+		SDpnt->has_cmdblocks = (0 != j);
+	} else {
+		SDpnt->has_cmdblocks = 1;
+	}
+	spin_unlock_irqrestore(&device_request_lock, flags);
+}
+
+void __init scsi_host_no_insert(char *str, int n)
+{
+    Scsi_Host_Name *shn, *shn2;
+    int len;
+    
+    len = strlen(str);
+    if (len && (shn = (Scsi_Host_Name *) kmalloc(sizeof(Scsi_Host_Name), GFP_ATOMIC))) {
+	if ((shn->name = kmalloc(len+1, GFP_ATOMIC))) {
+	    strncpy(shn->name, str, len);
+	    shn->name[len] = 0;
+	    shn->host_no = n;
+	    shn->host_registered = 0;
+	    shn->loaded_as_module = 1; /* numbers shouldn't be freed in any case */
+	    shn->next = NULL;
+	    if (scsi_host_no_list) {
+		for (shn2 = scsi_host_no_list;shn2->next;shn2 = shn2->next)
+		    ;
+		shn2->next = shn;
+	    }
+	    else
+		scsi_host_no_list = shn;
+	    max_scsi_hosts = n+1;
+	}
+	else
+	    kfree((char *) shn);
+    }
+}
+
+#ifdef CONFIG_PROC_FS
+static int scsi_proc_info(char *buffer, char **start, off_t offset, int length)
+{
+	Scsi_Device *scd;
+	struct Scsi_Host *HBA_ptr;
+	int size, len = 0;
+	off_t begin = 0;
+	off_t pos = 0;
+
+	/*
+	 * First, see if there are any attached devices or not.
+	 */
+	for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+		if (HBA_ptr->host_queue != NULL) {
+			break;
+		}
+	}
+	size = sprintf(buffer + len, "Attached devices: %s\n", (HBA_ptr) ? "" : "none");
+	len += size;
+	pos = begin + len;
+	for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+#if 0
+		size += sprintf(buffer + len, "scsi%2d: %s\n", (int) HBA_ptr->host_no,
+				HBA_ptr->hostt->procname);
+		len += size;
+		pos = begin + len;
+#endif
+		for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+			proc_print_scsidevice(scd, buffer, &size, len);
+			len += size;
+			pos = begin + len;
+
+			if (pos < offset) {
+				len = 0;
+				begin = pos;
+			}
+			if (pos > offset + length)
+				goto stop_output;
+		}
+	}
+
+stop_output:
+	*start = buffer + (offset - begin);	/* Start of wanted data */
+	len -= (offset - begin);	/* Start slop */
+	if (len > length)
+		len = length;	/* Ending slop */
+	return (len);
+}
+
+static int proc_scsi_gen_write(struct file * file, const char * buf,
+                              unsigned long length, void *data)
+{
+	struct Scsi_Device_Template *SDTpnt;
+	Scsi_Device *scd;
+	struct Scsi_Host *HBA_ptr;
+	char *p;
+	int host, channel, id, lun;
+	char * buffer;
+	int err;
+
+	if (!buf || length>PAGE_SIZE)
+		return -EINVAL;
+
+	if (!(buffer = (char *) __get_free_page(GFP_KERNEL)))
+		return -ENOMEM;
+	if(copy_from_user(buffer, buf, length))
+	{
+		err =-EFAULT;
+		goto out;
+	}
+
+	err = -EINVAL;
+
+	if (length < PAGE_SIZE)
+		buffer[length] = '\0';
+	else if (buffer[PAGE_SIZE-1])
+		goto out;
+
+	if (length < 11 || strncmp("scsi", buffer, 4))
+		goto out;
+
+	/*
+	 * Usage: echo "scsi dump #N" > /proc/scsi/scsi
+	 * to dump status of all scsi commands.  The number is used to specify the level
+	 * of detail in the dump.
+	 */
+	if (!strncmp("dump", buffer + 5, 4)) {
+		unsigned int level;
+
+		p = buffer + 10;
+
+		if (*p == '\0')
+			goto out;
+
+		level = simple_strtoul(p, NULL, 0);
+		scsi_dump_status(level);
+	}
+	/*
+	 * Usage: echo "scsi log token #N" > /proc/scsi/scsi
+	 * where token is one of [error,scan,mlqueue,mlcomplete,llqueue,
+	 * llcomplete,hlqueue,hlcomplete]
+	 */
+#ifdef CONFIG_SCSI_LOGGING		/* { */
+
+	if (!strncmp("log", buffer + 5, 3)) {
+		char *token;
+		unsigned int level;
+
+		p = buffer + 9;
+		token = p;
+		while (*p != ' ' && *p != '\t' && *p != '\0') {
+			p++;
+		}
+
+		if (*p == '\0') {
+			if (strncmp(token, "all", 3) == 0) {
+				/*
+				 * Turn on absolutely everything.
+				 */
+				scsi_logging_level = ~0;
+			} else if (strncmp(token, "none", 4) == 0) {
+				/*
+				 * Turn off absolutely everything.
+				 */
+				scsi_logging_level = 0;
+			} else {
+				goto out;
+			}
+		} else {
+			*p++ = '\0';
+
+			level = simple_strtoul(p, NULL, 0);
+
+			/*
+			 * Now figure out what to do with it.
+			 */
+			if (strcmp(token, "error") == 0) {
+				SCSI_SET_ERROR_RECOVERY_LOGGING(level);
+			} else if (strcmp(token, "timeout") == 0) {
+				SCSI_SET_TIMEOUT_LOGGING(level);
+			} else if (strcmp(token, "scan") == 0) {
+				SCSI_SET_SCAN_BUS_LOGGING(level);
+			} else if (strcmp(token, "mlqueue") == 0) {
+				SCSI_SET_MLQUEUE_LOGGING(level);
+			} else if (strcmp(token, "mlcomplete") == 0) {
+				SCSI_SET_MLCOMPLETE_LOGGING(level);
+			} else if (strcmp(token, "llqueue") == 0) {
+				SCSI_SET_LLQUEUE_LOGGING(level);
+			} else if (strcmp(token, "llcomplete") == 0) {
+				SCSI_SET_LLCOMPLETE_LOGGING(level);
+			} else if (strcmp(token, "hlqueue") == 0) {
+				SCSI_SET_HLQUEUE_LOGGING(level);
+			} else if (strcmp(token, "hlcomplete") == 0) {
+				SCSI_SET_HLCOMPLETE_LOGGING(level);
+			} else if (strcmp(token, "ioctl") == 0) {
+				SCSI_SET_IOCTL_LOGGING(level);
+			} else {
+				goto out;
+			}
+		}
+
+		printk(KERN_INFO "scsi logging level set to 0x%8.8x\n", scsi_logging_level);
+	}
+#endif	/* CONFIG_SCSI_LOGGING */ /* } */
+
+	/*
+	 * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
+	 * with  "0 1 2 3" replaced by your "Host Channel Id Lun".
+	 * Consider this feature BETA.
+	 *     CAUTION: This is not for hotplugging your peripherals. As
+	 *     SCSI was not designed for this you could damage your
+	 *     hardware !
+	 * However perhaps it is legal to switch on an
+	 * already connected device. It is perhaps not
+	 * guaranteed this device doesn't corrupt an ongoing data transfer.
+	 */
+	if (!strncmp("add-single-device", buffer + 5, 17)) {
+		p = buffer + 23;
+
+		host = simple_strtoul(p, &p, 0);
+		channel = simple_strtoul(p + 1, &p, 0);
+		id = simple_strtoul(p + 1, &p, 0);
+		lun = simple_strtoul(p + 1, &p, 0);
+
+		printk(KERN_INFO "scsi singledevice %d %d %d %d\n", host, channel,
+		       id, lun);
+
+		for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+			if (HBA_ptr->host_no == host) {
+				break;
+			}
+		}
+		err = -ENXIO;
+		if (!HBA_ptr)
+			goto out;
+
+		for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+			if ((scd->channel == channel
+			     && scd->id == id
+			     && scd->lun == lun)) {
+				break;
+			}
+		}
+
+		err = -ENOSYS;
+		if (scd)
+			goto out;	/* We do not yet support unplugging */
+
+		scan_scsis(HBA_ptr, 1, channel, id, lun);
+
+		/* FIXME (DB) This assumes that the queue_depth routines can be used
+		   in this context as well, while they were all designed to be
+		   called only once after the detect routine. (DB) */
+		/* queue_depth routine moved to inside scan_scsis(,1,,,) so
+		   it is called before build_commandblocks() */
+
+		err = length;
+		goto out;
+	}
+	/*
+	 * Usage: echo "scsi remove-single-device 0 1 2 3" >/proc/scsi/scsi
+	 * with  "0 1 2 3" replaced by your "Host Channel Id Lun".
+	 *
+	 * Consider this feature pre-BETA.
+	 *
+	 *     CAUTION: This is not for hotplugging your peripherals. As
+	 *     SCSI was not designed for this you could damage your
+	 *     hardware and thoroughly confuse the SCSI subsystem.
+	 *
+	 */
+	else if (!strncmp("remove-single-device", buffer + 5, 20)) {
+		p = buffer + 26;
+
+		host = simple_strtoul(p, &p, 0);
+		channel = simple_strtoul(p + 1, &p, 0);
+		id = simple_strtoul(p + 1, &p, 0);
+		lun = simple_strtoul(p + 1, &p, 0);
+
+
+		for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+			if (HBA_ptr->host_no == host) {
+				break;
+			}
+		}
+		err = -ENODEV;
+		if (!HBA_ptr)
+			goto out;
+
+		for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+			if ((scd->channel == channel
+			     && scd->id == id
+			     && scd->lun == lun)) {
+				break;
+			}
+		}
+
+		if (scd == NULL)
+			goto out;	/* there is no such device attached */
+
+		err = -EBUSY;
+		if (scd->access_count)
+			goto out;
+
+		SDTpnt = scsi_devicelist;
+		while (SDTpnt != NULL) {
+			if (SDTpnt->detach)
+				(*SDTpnt->detach) (scd);
+			SDTpnt = SDTpnt->next;
+		}
+
+		if (scd->attached == 0) {
+			/*
+			 * Nobody is using this device any more.
+			 * Free all of the command structures.
+			 */
+                        if (HBA_ptr->hostt->revoke)
+                                HBA_ptr->hostt->revoke(scd);
+#ifdef DEVFS_MUST_DIE
+			devfs_unregister (scd->de);
+#endif
+			scsi_release_commandblocks(scd);
+
+			/* Now we can remove the device structure */
+			if (scd->next != NULL)
+				scd->next->prev = scd->prev;
+
+			if (scd->prev != NULL)
+				scd->prev->next = scd->next;
+
+			if (HBA_ptr->host_queue == scd) {
+				HBA_ptr->host_queue = scd->next;
+			}
+			blk_cleanup_queue(&scd->request_queue);
+			kfree((char *) scd);
+		} else {
+			goto out;
+		}
+		err = 0;
+	}
+out:
+	
+	free_page((unsigned long) buffer);
+	return err;
+}
+#endif
+
+/*
+ * This entry point should be called by a driver if it is trying
+ * to add a low level scsi driver to the system.
+ */
+static int scsi_register_host(Scsi_Host_Template * tpnt)
+{
+    int pcount;
+    struct Scsi_Host *shpnt;
+    Scsi_Device *SDpnt;
+    struct Scsi_Device_Template *sdtpnt;
+    const char *name;
+    unsigned long flags;
+    int out_of_space = 0;
+
+    if (tpnt->next || !tpnt->detect)
+        return 1;	/* Must be already loaded, or
+                         * no detect routine available
+                         */
+
+    /* If max_sectors isn't set, default to max */
+    if (!tpnt->max_sectors)
+        tpnt->max_sectors = MAX_SECTORS;
+
+    pcount = next_scsi_host;
+
+    MOD_INC_USE_COUNT;
+
+    /* The detect routine must carefully spinunlock/spinlock if 
+       it enables interrupts, since all interrupt handlers do 
+       spinlock as well.
+       All lame drivers are going to fail due to the following 
+       spinlock. For the time beeing let's use it only for drivers 
+       using the new scsi code. NOTE: the detect routine could
+       redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */
+
+    if (tpnt->use_new_eh_code) {
+        spin_lock_irqsave(&io_request_lock, flags);
+        tpnt->present = tpnt->detect(tpnt);
+        spin_unlock_irqrestore(&io_request_lock, flags);
+    } else
+        tpnt->present = tpnt->detect(tpnt);
+
+    if (tpnt->present) {
+        if (pcount == next_scsi_host) {
+            if (tpnt->present > 1) {
+                printk(KERN_ERR "scsi: Failure to register low-level "
+                       "scsi driver");
+                scsi_unregister_host(tpnt);
+                return 1;
+            }
+            /* 
+             * The low-level driver failed to register a driver.
+             * We can do this now.
+             */
+            if(scsi_register(tpnt, 0)==NULL)
+            {
+                printk(KERN_ERR "scsi: register failed.\n");
+                scsi_unregister_host(tpnt);
+                return 1;
+            }
+        }
+        tpnt->next = scsi_hosts;	/* Add to the linked list */
+        scsi_hosts = tpnt;
+
+        /* Add the new driver to /proc/scsi */
+#ifdef CONFIG_PROC_FS
+        build_proc_dir_entries(tpnt);
+#endif
+
+
+#if 0
+        /*
+         * Add the kernel threads for each host adapter that will
+         * handle error correction.
+         */
+        for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+            if (shpnt->hostt == tpnt && shpnt->hostt->use_new_eh_code) {
+                DECLARE_MUTEX_LOCKED(sem);
+
+                shpnt->eh_notify = &sem;
+                kernel_thread((int (*)(void *)) scsi_error_handler,
+                              (void *) shpnt, 0);
+
+				/*
+				 * Now wait for the kernel error thread to initialize itself
+				 * as it might be needed when we scan the bus.
+				 */
+                down(&sem);
+                shpnt->eh_notify = NULL;
+            }
+        }
+#endif
+
+        for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+            if (shpnt->hostt == tpnt) {
+                if (tpnt->info) {
+                    name = tpnt->info(shpnt);
+                } else {
+                    name = tpnt->name;
+                }
+                printk(KERN_INFO "scsi%d : %s\n",		/* And print a little message */
+                       shpnt->host_no, name);
+            }
+        }
+
+        /* The next step is to call scan_scsis here.  This generates the
+         * Scsi_Devices entries
+         */
+        for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+            if (shpnt->hostt == tpnt) {
+                scan_scsis(shpnt, 0, 0, 0, 0);
+                if (shpnt->select_queue_depths != NULL) {
+                    (shpnt->select_queue_depths) (shpnt, shpnt->host_queue);
+                }
+            }
+        }
+
+        for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+            if (sdtpnt->init && sdtpnt->dev_noticed)
+                (*sdtpnt->init) ();
+        }
+
+        /*
+         * Next we create the Scsi_Cmnd structures for this host 
+         */
+        for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+            for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next)
+                if (SDpnt->host->hostt == tpnt) {
+                    for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+                        if (sdtpnt->attach)
+                            (*sdtpnt->attach) (SDpnt);
+                    if (SDpnt->attached) {
+                        scsi_build_commandblocks(SDpnt);
+                        if (0 == SDpnt->has_cmdblocks)
+                            out_of_space = 1;
+                    }
+                }
+        }
+
+        /*
+         * Now that we have all of the devices, resize the DMA pool,
+         * as required.  */
+        if (!out_of_space)
+            scsi_resize_dma_pool();
+
+
+        /* This does any final handling that is required. */
+        for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+            if (sdtpnt->finish && sdtpnt->nr_dev) {
+                (*sdtpnt->finish) ();
+            }
+        }
+    }
+#if defined(USE_STATIC_SCSI_MEMORY)
+    printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n",
+           (scsi_memory_upper_value - scsi_memory_lower_value) / 1024,
+           (scsi_init_memory_start - scsi_memory_lower_value) / 1024,
+           (scsi_memory_upper_value - scsi_init_memory_start) / 1024);
+#endif
+
+    if (out_of_space) {
+        scsi_unregister_host(tpnt);	/* easiest way to clean up?? */
+        return 1;
+    } else
+        return 0;
+}
+
+
+/*
+ * Similarly, this entry point should be called by a loadable module if it
+ * is trying to remove a low level scsi driver from the system.
+ */
+static int scsi_unregister_host(Scsi_Host_Template * tpnt)
+{
+	int online_status;
+	int pcount0, pcount;
+	Scsi_Cmnd *SCpnt;
+	Scsi_Device *SDpnt;
+	Scsi_Device *SDpnt1;
+	struct Scsi_Device_Template *sdtpnt;
+	struct Scsi_Host *sh1;
+	struct Scsi_Host *shpnt;
+	char name[10];	/* host_no>=10^9? I don't think so. */
+
+#if 0
+	/* get the big kernel lock, so we don't race with open() */
+	lock_kernel();
+#endif
+
+	/*
+	 * First verify that this host adapter is completely free with no pending
+	 * commands 
+	 */
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			if (SDpnt->host->hostt == tpnt
+			    && SDpnt->host->hostt->module
+			    && GET_USE_COUNT(SDpnt->host->hostt->module))
+				goto err_out;
+			/* 
+			 * FIXME(eric) - We need to find a way to notify the
+			 * low level driver that we are shutting down - via the
+			 * special device entry that still needs to get added. 
+			 *
+			 * Is detach interface below good enough for this?
+			 */
+		}
+	}
+
+	/*
+	 * FIXME(eric) put a spinlock on this.  We force all of the devices offline
+	 * to help prevent race conditions where other hosts/processors could try and
+	 * get in and queue a command.
+	 */
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			if (SDpnt->host->hostt == tpnt)
+				SDpnt->online = FALSE;
+
+		}
+	}
+
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		if (shpnt->hostt != tpnt) {
+			continue;
+		}
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			/*
+			 * Loop over all of the commands associated with the device.  If any of
+			 * them are busy, then set the state back to inactive and bail.
+			 */
+			for (SCpnt = SDpnt->device_queue; SCpnt;
+			     SCpnt = SCpnt->next) {
+				online_status = SDpnt->online;
+				SDpnt->online = FALSE;
+				if (SCpnt->request.rq_status != RQ_INACTIVE) {
+					printk(KERN_ERR "SCSI device not inactive - rq_status=%d, target=%d, pid=%ld, state=%d, owner=%d.\n",
+					       SCpnt->request.rq_status, SCpnt->target, SCpnt->pid,
+					     SCpnt->state, SCpnt->owner);
+					for (SDpnt1 = shpnt->host_queue; SDpnt1;
+					     SDpnt1 = SDpnt1->next) {
+						for (SCpnt = SDpnt1->device_queue; SCpnt;
+						     SCpnt = SCpnt->next)
+							if (SCpnt->request.rq_status == RQ_SCSI_DISCONNECTING)
+								SCpnt->request.rq_status = RQ_INACTIVE;
+					}
+					SDpnt->online = online_status;
+					printk(KERN_ERR "Device busy???\n");
+					goto err_out;
+				}
+				/*
+				 * No, this device is really free.  Mark it as such, and
+				 * continue on.
+				 */
+				SCpnt->state = SCSI_STATE_DISCONNECTING;
+				SCpnt->request.rq_status = RQ_SCSI_DISCONNECTING;	/* Mark as busy */
+			}
+		}
+	}
+	/* Next we detach the high level drivers from the Scsi_Device structures */
+
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		if (shpnt->hostt != tpnt) {
+			continue;
+		}
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+				if (sdtpnt->detach)
+					(*sdtpnt->detach) (SDpnt);
+
+			/* If something still attached, punt */
+			if (SDpnt->attached) {
+				printk(KERN_ERR "Attached usage count = %d\n", SDpnt->attached);
+				goto err_out;
+			}
+#ifdef DEVFS_MUST_DIE
+			devfs_unregister (SDpnt->de);
+#endif
+		}
+	}
+
+#if 0
+	/*
+	 * Next, kill the kernel error recovery thread for this host.
+	 */
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		if (shpnt->hostt == tpnt
+		    && shpnt->hostt->use_new_eh_code
+		    && shpnt->ehandler != NULL) {
+			DECLARE_MUTEX_LOCKED(sem);
+
+			shpnt->eh_notify = &sem;
+			send_sig(SIGHUP, shpnt->ehandler, 1);
+			down(&sem);
+			shpnt->eh_notify = NULL;
+		}
+	}
+#endif
+
+	/* Next we free up the Scsi_Cmnd structures for this host */
+
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		if (shpnt->hostt != tpnt) {
+			continue;
+		}
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = shpnt->host_queue) {
+			scsi_release_commandblocks(SDpnt);
+
+			blk_cleanup_queue(&SDpnt->request_queue);
+			/* Next free up the Scsi_Device structures for this host */
+			shpnt->host_queue = SDpnt->next;
+			kfree((char *) SDpnt);
+
+		}
+	}
+
+	/* Next we go through and remove the instances of the individual hosts
+	 * that were detected */
+
+	pcount0 = next_scsi_host;
+	for (shpnt = scsi_hostlist; shpnt; shpnt = sh1) {
+		sh1 = shpnt->next;
+		if (shpnt->hostt != tpnt)
+			continue;
+		pcount = next_scsi_host;
+		/* Remove the /proc/scsi directory entry */
+		sprintf(name,"%d",shpnt->host_no);
+#ifdef CONFIG_PROC_FS
+		remove_proc_entry(name, tpnt->proc_dir);
+#endif
+		if (tpnt->release)
+			(*tpnt->release) (shpnt);
+		else {
+			/* This is the default case for the release function.
+			 * It should do the right thing for most correctly
+			 * written host adapters.
+			 */
+			if (shpnt->irq)
+				free_irq(shpnt->irq, NULL);
+
+#if 0
+			if (shpnt->dma_channel != 0xff)
+				free_dma(shpnt->dma_channel);
+#endif
+			if (shpnt->io_port && shpnt->n_io_port)
+				release_region(shpnt->io_port, shpnt->n_io_port);
+		}
+		if (pcount == next_scsi_host)
+			scsi_unregister(shpnt);
+		tpnt->present--;
+	}
+
+	/*
+	 * If there are absolutely no more hosts left, it is safe
+	 * to completely nuke the DMA pool.  The resize operation will
+	 * do the right thing and free everything.
+	 */
+	if (!scsi_hosts)
+		scsi_resize_dma_pool();
+
+	if (pcount0 != next_scsi_host)
+		printk(KERN_INFO "scsi : %d host%s left.\n", next_scsi_host,
+		       (next_scsi_host == 1) ? "" : "s");
+
+#if defined(USE_STATIC_SCSI_MEMORY)
+	printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n",
+	       (scsi_memory_upper_value - scsi_memory_lower_value) / 1024,
+	       (scsi_init_memory_start - scsi_memory_lower_value) / 1024,
+	       (scsi_memory_upper_value - scsi_init_memory_start) / 1024);
+#endif
+
+	/*
+	 * Remove it from the linked list and /proc if all
+	 * hosts were successfully removed (ie preset == 0)
+	 */
+	if (!tpnt->present) {
+		Scsi_Host_Template **SHTp = &scsi_hosts;
+		Scsi_Host_Template *SHT;
+
+		while ((SHT = *SHTp) != NULL) {
+			if (SHT == tpnt) {
+				*SHTp = SHT->next;
+#ifdef CONFIG_PROC_FS
+				remove_proc_entry(tpnt->proc_name, proc_scsi);
+#endif
+				break;
+			}
+			SHTp = &SHT->next;
+		}
+	}
+	MOD_DEC_USE_COUNT;
+
+#if 0
+	unlock_kernel();
+#endif
+	return 0;
+
+err_out:
+
+#if 0
+	unlock_kernel();
+#endif
+	return -1;
+}
+
+static int scsi_unregister_device(struct Scsi_Device_Template *tpnt);
+
+/*
+ * This entry point should be called by a loadable module if it is trying
+ * add a high level scsi driver to the system.
+ */
+static int scsi_register_device_module(struct Scsi_Device_Template *tpnt)
+{
+	Scsi_Device *SDpnt;
+	struct Scsi_Host *shpnt;
+	int out_of_space = 0;
+
+	if (tpnt->next)
+		return 1;
+
+	scsi_register_device(tpnt);
+	/*
+	 * First scan the devices that we know about, and see if we notice them.
+	 */
+
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			if (tpnt->detect)
+				SDpnt->detected = (*tpnt->detect) (SDpnt);
+		}
+	}
+
+	/*
+	 * If any of the devices would match this driver, then perform the
+	 * init function.
+	 */
+	if (tpnt->init && tpnt->dev_noticed) {
+		if ((*tpnt->init) ()) {
+			for (shpnt = scsi_hostlist; shpnt;
+			     shpnt = shpnt->next) {
+				for (SDpnt = shpnt->host_queue; SDpnt;
+				     SDpnt = SDpnt->next) {
+					SDpnt->detected = 0;
+				}
+			}
+			scsi_deregister_device(tpnt);
+			return 1;
+		}
+	}
+
+	/*
+	 * Now actually connect the devices to the new driver.
+	 */
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			SDpnt->attached += SDpnt->detected;
+			SDpnt->detected = 0;
+			if (tpnt->attach)
+				(*tpnt->attach) (SDpnt);
+			/*
+			 * If this driver attached to the device, and don't have any
+			 * command blocks for this device, allocate some.
+			 */
+			if (SDpnt->attached && SDpnt->has_cmdblocks == 0) {
+				SDpnt->online = TRUE;
+				scsi_build_commandblocks(SDpnt);
+				if (0 == SDpnt->has_cmdblocks)
+					out_of_space = 1;
+			}
+		}
+	}
+
+	/*
+	 * This does any final handling that is required.
+	 */
+	if (tpnt->finish && tpnt->nr_dev)
+		(*tpnt->finish) ();
+	if (!out_of_space)
+		scsi_resize_dma_pool();
+	MOD_INC_USE_COUNT;
+
+	if (out_of_space) {
+		scsi_unregister_device(tpnt);	/* easiest way to clean up?? */
+		return 1;
+	} else
+		return 0;
+}
+
+static int scsi_unregister_device(struct Scsi_Device_Template *tpnt)
+{
+	Scsi_Device *SDpnt;
+	struct Scsi_Host *shpnt;
+
+#if 0
+	lock_kernel();
+#endif
+	/*
+	 * If we are busy, this is not going to fly.
+	 */
+	if (GET_USE_COUNT(tpnt->module) != 0)
+		goto error_out;
+
+	/*
+	 * Next, detach the devices from the driver.
+	 */
+
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		for (SDpnt = shpnt->host_queue; SDpnt;
+		     SDpnt = SDpnt->next) {
+			if (tpnt->detach)
+				(*tpnt->detach) (SDpnt);
+			if (SDpnt->attached == 0) {
+				SDpnt->online = FALSE;
+
+				/*
+				 * Nobody is using this device any more.  Free all of the
+				 * command structures.
+				 */
+				scsi_release_commandblocks(SDpnt);
+			}
+		}
+	}
+	/*
+	 * Extract the template from the linked list.
+	 */
+	scsi_deregister_device(tpnt);
+
+	MOD_DEC_USE_COUNT;
+#if 0
+	unlock_kernel();
+#endif
+
+	/*
+	 * Final cleanup for the driver is done in the driver sources in the
+	 * cleanup function.
+	 */
+	return 0;
+error_out:
+#if 0
+	unlock_kernel();
+#endif
+	return -1;
+}
+
+
+/* This function should be called by drivers which needs to register
+ * with the midlevel scsi system. As of 2.4.0-test9pre3 this is our
+ * main device/hosts register function	/mathiasen
+ */
+int scsi_register_module(int module_type, void *ptr)
+{
+	switch (module_type) {
+	case MODULE_SCSI_HA:
+		return scsi_register_host((Scsi_Host_Template *) ptr);
+
+		/* Load upper level device handler of some kind */
+	case MODULE_SCSI_DEV:
+#ifdef CONFIG_KMOD
+		if (scsi_hosts == NULL)
+			request_module("scsi_hostadapter");
+#endif
+		return scsi_register_device_module((struct Scsi_Device_Template *) ptr);
+		/* The rest of these are not yet implemented */
+
+		/* Load constants.o */
+	case MODULE_SCSI_CONST:
+
+		/* Load specialized ioctl handler for some device.  Intended for
+		 * cdroms that have non-SCSI2 audio command sets. */
+	case MODULE_SCSI_IOCTL:
+
+	default:
+		return 1;
+	}
+}
+
+/* Reverse the actions taken above
+ */
+int scsi_unregister_module(int module_type, void *ptr)
+{
+	int retval = 0;
+
+	switch (module_type) {
+	case MODULE_SCSI_HA:
+		retval = scsi_unregister_host((Scsi_Host_Template *) ptr);
+		break;
+	case MODULE_SCSI_DEV:
+		retval = scsi_unregister_device((struct Scsi_Device_Template *)ptr);
+ 		break;
+		/* The rest of these are not yet implemented. */
+	case MODULE_SCSI_CONST:
+	case MODULE_SCSI_IOCTL:
+		break;
+	default:;
+	}
+	return retval;
+}
+
+#ifdef CONFIG_PROC_FS
+/*
+ * Function:    scsi_dump_status
+ *
+ * Purpose:     Brain dump of scsi system, used for problem solving.
+ *
+ * Arguments:   level - used to indicate level of detail.
+ *
+ * Notes:       The level isn't used at all yet, but we need to find some way
+ *              of sensibly logging varying degrees of information.  A quick one-line
+ *              display of each command, plus the status would be most useful.
+ *
+ *              This does depend upon CONFIG_SCSI_LOGGING - I do want some way of turning
+ *              it all off if the user wants a lean and mean kernel.  It would probably
+ *              also be useful to allow the user to specify one single host to be dumped.
+ *              A second argument to the function would be useful for that purpose.
+ *
+ *              FIXME - some formatting of the output into tables would be very handy.
+ */
+static void scsi_dump_status(int level)
+{
+#ifdef CONFIG_SCSI_LOGGING		/* { */
+	int i;
+	struct Scsi_Host *shpnt;
+	Scsi_Cmnd *SCpnt;
+	Scsi_Device *SDpnt;
+	printk(KERN_INFO "Dump of scsi host parameters:\n");
+	i = 0;
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		printk(KERN_INFO " %d %d %d : %d %d\n",
+		       shpnt->host_failed,
+		       shpnt->host_busy,
+		       atomic_read(&shpnt->host_active),
+		       shpnt->host_blocked,
+		       shpnt->host_self_blocked);
+	}
+
+	printk(KERN_INFO "\n\n");
+	printk(KERN_INFO "Dump of scsi command parameters:\n");
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		printk(KERN_INFO "h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result\n");
+		for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+			for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+				/*  (0) h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result %d %x      */
+				printk(KERN_INFO "(%3d) %2d:%1d:%2d:%2d (%6s %4ld %4ld %4ld %4x %1d) (%1d %1d 0x%2x) (%4d %4d %4d) 0x%2.2x 0x%2.2x 0x%8.8x\n",
+				       i++,
+
+				       SCpnt->host->host_no,
+				       SCpnt->channel,
+				       SCpnt->target,
+				       SCpnt->lun,
+
+				       kdevname(SCpnt->request.rq_dev),
+				       SCpnt->request.sector,
+				       SCpnt->request.nr_sectors,
+				       SCpnt->request.current_nr_sectors,
+				       SCpnt->request.rq_status,
+				       SCpnt->use_sg,
+
+				       SCpnt->retries,
+				       SCpnt->allowed,
+				       SCpnt->flags,
+
+				       SCpnt->timeout_per_command,
+				       SCpnt->timeout,
+				       SCpnt->internal_timeout,
+
+				       SCpnt->cmnd[0],
+				       SCpnt->sense_buffer[2],
+				       SCpnt->result);
+			}
+		}
+	}
+
+	for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+		for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+			/* Now dump the request lists for each block device */
+			printk(KERN_INFO "Dump of pending block device requests\n");
+			for (i = 0; i < MAX_BLKDEV; i++) {
+				struct list_head * queue_head;
+
+				queue_head = &blk_dev[i].request_queue.queue_head;
+				if (!list_empty(queue_head)) {
+					struct request *req;
+					struct list_head * entry;
+
+					printk(KERN_INFO "%d: ", i);
+					entry = queue_head->next;
+					do {
+						req = blkdev_entry_to_request(entry);
+						printk("(%s %d %ld %ld %ld) ",
+						   kdevname(req->rq_dev),
+						       req->cmd,
+						       req->sector,
+						       req->nr_sectors,
+						req->current_nr_sectors);
+					} while ((entry = entry->next) != queue_head);
+					printk("\n");
+				}
+			}
+		}
+	}
+#endif	/* CONFIG_SCSI_LOGGING */ /* } */
+}
+#endif				/* CONFIG_PROC_FS */
+
+static int __init scsi_host_no_init (char *str)
+{
+    static int next_no = 0;
+    char *temp;
+
+    while (str) {
+	temp = str;
+	while (*temp && (*temp != ':') && (*temp != ','))
+	    temp++;
+	if (!*temp)
+	    temp = NULL;
+	else
+	    *temp++ = 0;
+	scsi_host_no_insert(str, next_no);
+	str = temp;
+	next_no++;
+    }
+    return 1;
+}
+
+static char *scsihosts;
+
+MODULE_PARM(scsihosts, "s");
+MODULE_DESCRIPTION("SCSI core");
+MODULE_LICENSE("GPL");
+
+#ifndef MODULE
+int __init scsi_setup(char *str)
+{
+	scsihosts = str;
+	return 1;
+}
+
+__setup("scsihosts=", scsi_setup);
+#endif
+
+static spinlock_t slock2 = SPIN_LOCK_UNLOCKED; 
+
+static int __init init_scsi(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *generic;
+#endif
+
+	printk(KERN_INFO "SCSI subsystem driver " REVISION "\n");
+
+        {
+            unsigned long flags; 
+            
+            spin_lock_irqsave(&slock2, flags); 
+            spin_unlock_irqrestore(&slock2, flags); 
+            printk("SCSI start of day -- flags = %lx\n", flags); 
+        }
+
+        if( scsi_init_minimal_dma_pool() != 0 )
+        {
+                return 1;
+        }
+
+#ifdef CONFIG_PROC_FS
+	/*
+	 * This makes /proc/scsi and /proc/scsi/scsi visible.
+	 */
+	proc_scsi = proc_mkdir("scsi", 0);
+	if (!proc_scsi) {
+		printk (KERN_ERR "cannot init /proc/scsi\n");
+		return -ENOMEM;
+	}
+	generic = create_proc_info_entry ("scsi/scsi", 0, 0, scsi_proc_info);
+	if (!generic) {
+		printk (KERN_ERR "cannot init /proc/scsi/scsi\n");
+		remove_proc_entry("scsi", 0);
+		return -ENOMEM;
+	}
+	generic->write_proc = proc_scsi_gen_write;
+#endif
+
+#ifdef DEVFS_MUST_DIE
+        scsi_devfs_handle = devfs_mk_dir (NULL, "scsi", NULL);
+#endif
+        if (scsihosts)
+		printk(KERN_INFO "scsi: host order: %s\n", scsihosts);	
+	scsi_host_no_init (scsihosts);
+	/*
+	 * This is where the processing takes place for most everything
+	 * when commands are completed.
+	 */
+	init_bh(SCSI_BH, scsi_bottom_half_handler);
+
+        {
+            unsigned long flags; 
+            
+            spin_lock_irqsave(&slock2, flags); 
+            spin_unlock_irqrestore(&slock2, flags); 
+            printk("SCSI end of day -- flags = %lx\n", flags); 
+        }
+
+
+	return 0;
+}
+
+static void __exit exit_scsi(void)
+{
+	Scsi_Host_Name *shn, *shn2 = NULL;
+
+	remove_bh(SCSI_BH);
+
+#ifdef DEVFS_MUST_DIE
+        devfs_unregister (scsi_devfs_handle);
+#endif
+        for (shn = scsi_host_no_list;shn;shn = shn->next) {
+		if (shn->name)
+			kfree(shn->name);
+                if (shn2)
+			kfree (shn2);
+                shn2 = shn;
+        }
+        if (shn2)
+		kfree (shn2);
+
+#ifdef CONFIG_PROC_FS
+	/* No, we're not here anymore. Don't show the /proc/scsi files. */
+	remove_proc_entry ("scsi/scsi", 0);
+	remove_proc_entry ("scsi", 0);
+#endif
+	
+	/*
+	 * Free up the DMA pool.
+	 */
+	scsi_resize_dma_pool();
+
+}
+
+module_init(init_scsi);
+module_exit(exit_scsi);
+
+/*
+ * Function:    scsi_get_host_dev()
+ *
+ * Purpose:     Create a Scsi_Device that points to the host adapter itself.
+ *
+ * Arguments:   SHpnt   - Host that needs a Scsi_Device
+ *
+ * Lock status: None assumed.
+ *
+ * Returns:     The Scsi_Device or NULL
+ *
+ * Notes:
+ */
+Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt)
+{
+        Scsi_Device * SDpnt;
+
+        /*
+         * Attach a single Scsi_Device to the Scsi_Host - this should
+         * be made to look like a "pseudo-device" that points to the
+         * HA itself.  For the moment, we include it at the head of
+         * the host_queue itself - I don't think we want to show this
+         * to the HA in select_queue_depths(), as this would probably confuse
+         * matters.
+         * Note - this device is not accessible from any high-level
+         * drivers (including generics), which is probably not
+         * optimal.  We can add hooks later to attach 
+         */
+        SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device),
+                                        GFP_ATOMIC);
+        if(SDpnt == NULL)
+        	return NULL;
+        	
+        memset(SDpnt, 0, sizeof(Scsi_Device));
+
+        SDpnt->host = SHpnt;
+        SDpnt->id = SHpnt->this_id;
+        SDpnt->type = -1;
+        SDpnt->queue_depth = 1;
+        
+	scsi_build_commandblocks(SDpnt);
+
+	scsi_initialize_queue(SDpnt, SHpnt);
+
+	SDpnt->online = TRUE;
+
+#if 0
+        /*
+         * Initialize the object that we will use to wait for command blocks.
+         */
+	init_waitqueue_head(&SDpnt->scpnt_wait);
+#endif
+        return SDpnt;
+}
+
+/*
+ * Function:    scsi_free_host_dev()
+ *
+ * Purpose:     Create a Scsi_Device that points to the host adapter itself.
+ *
+ * Arguments:   SHpnt   - Host that needs a Scsi_Device
+ *
+ * Lock status: None assumed.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:
+ */
+void scsi_free_host_dev(Scsi_Device * SDpnt)
+{
+        if( (unsigned char) SDpnt->id != (unsigned char) SDpnt->host->this_id )
+        {
+                panic("Attempt to delete wrong device\n");
+        }
+
+        blk_cleanup_queue(&SDpnt->request_queue);
+
+        /*
+         * We only have a single SCpnt attached to this device.  Free
+         * it now.
+         */
+	scsi_release_commandblocks(SDpnt);
+        kfree(SDpnt);
+}
+
+/*
+ * Function:	scsi_reset_provider_done_command
+ *
+ * Purpose:	Dummy done routine.
+ *
+ * Notes:	Some low level drivers will call scsi_done and end up here,
+ *		others won't bother.
+ *		We don't want the bogus command used for the bus/device
+ *		reset to find its way into the mid-layer so we intercept
+ *		it here.
+ */
+static void
+scsi_reset_provider_done_command(Scsi_Cmnd *SCpnt)
+{
+}
+
+/*
+ * Function:	scsi_reset_provider
+ *
+ * Purpose:	Send requested reset to a bus or device at any phase.
+ *
+ * Arguments:	device	- device to send reset to
+ *		flag - reset type (see scsi.h)
+ *
+ * Returns:	SUCCESS/FAILURE.
+ *
+ * Notes:	This is used by the SCSI Generic driver to provide
+ *		Bus/Device reset capability.
+ */
+int
+scsi_reset_provider(Scsi_Device *dev, int flag)
+{
+	Scsi_Cmnd SC, *SCpnt = &SC;
+	int rtn;
+
+	memset(&SCpnt->eh_timeout, 0, sizeof(SCpnt->eh_timeout));
+	SCpnt->host                    	= dev->host;
+	SCpnt->device                  	= dev;
+	SCpnt->target                  	= dev->id;
+	SCpnt->lun                     	= dev->lun;
+	SCpnt->channel                 	= dev->channel;
+	SCpnt->request.rq_status       	= RQ_SCSI_BUSY;
+	SCpnt->request.waiting        	= NULL;
+	SCpnt->use_sg                  	= 0;
+	SCpnt->old_use_sg              	= 0;
+	SCpnt->old_cmd_len             	= 0;
+	SCpnt->underflow               	= 0;
+	SCpnt->transfersize            	= 0;
+	SCpnt->resid			= 0;
+	SCpnt->serial_number           	= 0;
+	SCpnt->serial_number_at_timeout	= 0;
+	SCpnt->host_scribble           	= NULL;
+	SCpnt->next                    	= NULL;
+	SCpnt->state                   	= SCSI_STATE_INITIALIZING;
+	SCpnt->owner	     		= SCSI_OWNER_MIDLEVEL;
+    
+	memset(&SCpnt->cmnd, '\0', sizeof(SCpnt->cmnd));
+    
+	SCpnt->scsi_done		= scsi_reset_provider_done_command;
+	SCpnt->done			= NULL;
+	SCpnt->reset_chain		= NULL;
+        
+	SCpnt->buffer			= NULL;
+	SCpnt->bufflen			= 0;
+	SCpnt->request_buffer		= NULL;
+	SCpnt->request_bufflen		= 0;
+
+	SCpnt->internal_timeout		= NORMAL_TIMEOUT;
+	SCpnt->abort_reason		= DID_ABORT;
+
+	SCpnt->cmd_len			= 0;
+
+	SCpnt->sc_data_direction	= SCSI_DATA_UNKNOWN;
+	SCpnt->sc_request		= NULL;
+	SCpnt->sc_magic			= SCSI_CMND_MAGIC;
+
+	/*
+	 * Sometimes the command can get back into the timer chain,
+	 * so use the pid as an identifier.
+	 */
+	SCpnt->pid			= 0;
+
+	if (dev->host->hostt->use_new_eh_code) {
+		rtn = scsi_new_reset(SCpnt, flag);
+	} else {
+#if 0
+		unsigned long flags;
+
+		spin_lock_irqsave(&io_request_lock, flags);
+		rtn = scsi_old_reset(SCpnt, flag);
+		spin_unlock_irqrestore(&io_request_lock, flags);
+#endif
+	}
+
+	scsi_delete_timer(SCpnt);
+	return rtn;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi.h b/xen/drivers/scsi/scsi.h
new file mode 100644
index 0000000000..338bca8f7b
--- /dev/null
+++ b/xen/drivers/scsi/scsi.h
@@ -0,0 +1,896 @@
+/*
+ *  scsi.h Copyright (C) 1992 Drew Eckhardt 
+ *         Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale
+ *  generic SCSI package header file by
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *       Modified by Eric Youngdale eric@andante.org to
+ *       add scatter-gather, multiple outstanding request, and other
+ *       enhancements.
+ */
+
+#ifndef _SCSI_H
+#define _SCSI_H
+
+#include <xeno/config.h>	/* for CONFIG_SCSI_LOGGING */
+/*#include <xeno/devfs_fs_kernel.h>*/
+/*#include <xeno/proc_fs.h>*/
+
+/*
+ * Some of the public constants are being moved to this file.
+ * We include it here so that what came from where is transparent.
+ */
+#include <scsi/scsi.h>
+
+/*#include <xeno/random.h>*/
+
+#include <asm/hardirq.h>
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+
+/*
+ * These are the values that the SCpnt->sc_data_direction and 
+ * SRpnt->sr_data_direction can take.  These need to be set
+ * The SCSI_DATA_UNKNOWN value is essentially the default.
+ * In the event that the command creator didn't bother to
+ * set a value, you will see SCSI_DATA_UNKNOWN.
+ */
+#define SCSI_DATA_UNKNOWN       0
+#define SCSI_DATA_WRITE         1
+#define SCSI_DATA_READ          2
+#define SCSI_DATA_NONE          3
+
+#ifdef CONFIG_PCI
+#include <xeno/pci.h>
+#if ((SCSI_DATA_UNKNOWN == PCI_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == PCI_DMA_TODEVICE) && (SCSI_DATA_READ == PCI_DMA_FROMDEVICE) && (SCSI_DATA_NONE == PCI_DMA_NONE))
+#define scsi_to_pci_dma_dir(scsi_dir)	((int)(scsi_dir))
+#else
+extern __inline__ int scsi_to_pci_dma_dir(unsigned char scsi_dir)
+{
+        if (scsi_dir == SCSI_DATA_UNKNOWN)
+                return PCI_DMA_BIDIRECTIONAL;
+        if (scsi_dir == SCSI_DATA_WRITE)
+                return PCI_DMA_TODEVICE;
+        if (scsi_dir == SCSI_DATA_READ)
+                return PCI_DMA_FROMDEVICE;
+        return PCI_DMA_NONE;
+}
+#endif
+#endif
+
+#if defined(CONFIG_SBUS) && !defined(CONFIG_SUN3) && !defined(CONFIG_SUN3X)
+#include <asm/sbus.h>
+#if ((SCSI_DATA_UNKNOWN == SBUS_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == SBUS_DMA_TODEVICE) && (SCSI_DATA_READ == SBUS_DMA_FROMDEVICE) && (SCSI_DATA_NONE == SBUS_DMA_NONE))
+#define scsi_to_sbus_dma_dir(scsi_dir)	((int)(scsi_dir))
+#else
+extern __inline__ int scsi_to_sbus_dma_dir(unsigned char scsi_dir)
+{
+        if (scsi_dir == SCSI_DATA_UNKNOWN)
+                return SBUS_DMA_BIDIRECTIONAL;
+        if (scsi_dir == SCSI_DATA_WRITE)
+                return SBUS_DMA_TODEVICE;
+        if (scsi_dir == SCSI_DATA_READ)
+                return SBUS_DMA_FROMDEVICE;
+        return SBUS_DMA_NONE;
+}
+#endif
+#endif
+
+/*
+ * Some defs, in case these are not defined elsewhere.
+ */
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define MAX_SCSI_DEVICE_CODE 14
+extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
+
+#ifdef DEBUG
+#define SCSI_TIMEOUT (5*HZ)
+#else
+#define SCSI_TIMEOUT (2*HZ)
+#endif
+
+/*
+ * Used for debugging the new queueing code.  We want to make sure
+ * that the lock state is consistent with design.  Only do this in
+ * the user space simulator.
+ */
+#define ASSERT_LOCK(_LOCK, _COUNT)
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USER_DEBUG)
+#undef ASSERT_LOCK
+#define ASSERT_LOCK(_LOCK,_COUNT)       \
+        { if( (_LOCK)->lock != _COUNT )   \
+                panic("Lock count inconsistent %s %d\n", __FILE__, __LINE__); \
+                                                                                       }
+#endif
+
+/*
+ *  Use these to separate status msg and our bytes
+ *
+ *  These are set by:
+ *
+ *      status byte = set from target device
+ *      msg_byte    = return status from host adapter itself.
+ *      host_byte   = set by low-level driver to indicate status.
+ *      driver_byte = set by mid-level.
+ */
+#define status_byte(result) (((result) >> 1) & 0x1f)
+#define msg_byte(result)    (((result) >> 8) & 0xff)
+#define host_byte(result)   (((result) >> 16) & 0xff)
+#define driver_byte(result) (((result) >> 24) & 0xff)
+#define suggestion(result)  (driver_byte(result) & SUGGEST_MASK)
+
+#define sense_class(sense)  (((sense) >> 4) & 0x7)
+#define sense_error(sense)  ((sense) & 0xf)
+#define sense_valid(sense)  ((sense) & 0x80);
+
+#define NEEDS_RETRY     0x2001
+#define SUCCESS         0x2002
+#define FAILED          0x2003
+#define QUEUED          0x2004
+#define SOFT_ERROR      0x2005
+#define ADD_TO_MLQUEUE  0x2006
+
+/*
+ * These are the values that scsi_cmd->state can take.
+ */
+#define SCSI_STATE_TIMEOUT         0x1000
+#define SCSI_STATE_FINISHED        0x1001
+#define SCSI_STATE_FAILED          0x1002
+#define SCSI_STATE_QUEUED          0x1003
+#define SCSI_STATE_UNUSED          0x1006
+#define SCSI_STATE_DISCONNECTING   0x1008
+#define SCSI_STATE_INITIALIZING    0x1009
+#define SCSI_STATE_BHQUEUE         0x100a
+#define SCSI_STATE_MLQUEUE         0x100b
+
+/*
+ * These are the values that the owner field can take.
+ * They are used as an indication of who the command belongs to.
+ */
+#define SCSI_OWNER_HIGHLEVEL      0x100
+#define SCSI_OWNER_MIDLEVEL       0x101
+#define SCSI_OWNER_LOWLEVEL       0x102
+#define SCSI_OWNER_ERROR_HANDLER  0x103
+#define SCSI_OWNER_BH_HANDLER     0x104
+#define SCSI_OWNER_NOBODY         0x105
+
+#define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7]
+
+#define IDENTIFY_BASE       0x80
+#define IDENTIFY(can_disconnect, lun)   (IDENTIFY_BASE |\
+		     ((can_disconnect) ?  0x40 : 0) |\
+		     ((lun) & 0x07))
+
+
+/*
+ * This defines the scsi logging feature.  It is a means by which the
+ * user can select how much information they get about various goings on,
+ * and it can be really useful for fault tracing.  The logging word is divided
+ * into 8 nibbles, each of which describes a loglevel.  The division of things
+ * is somewhat arbitrary, and the division of the word could be changed if it
+ * were really needed for any reason.  The numbers below are the only place where these
+ * are specified.  For a first go-around, 3 bits is more than enough, since this
+ * gives 8 levels of logging (really 7, since 0 is always off).  Cutting to 2 bits
+ * might be wise at some point.
+ */
+
+#define SCSI_LOG_ERROR_SHIFT              0
+#define SCSI_LOG_TIMEOUT_SHIFT            3
+#define SCSI_LOG_SCAN_SHIFT               6
+#define SCSI_LOG_MLQUEUE_SHIFT            9
+#define SCSI_LOG_MLCOMPLETE_SHIFT         12
+#define SCSI_LOG_LLQUEUE_SHIFT            15
+#define SCSI_LOG_LLCOMPLETE_SHIFT         18
+#define SCSI_LOG_HLQUEUE_SHIFT            21
+#define SCSI_LOG_HLCOMPLETE_SHIFT         24
+#define SCSI_LOG_IOCTL_SHIFT              27
+
+#define SCSI_LOG_ERROR_BITS               3
+#define SCSI_LOG_TIMEOUT_BITS             3
+#define SCSI_LOG_SCAN_BITS                3
+#define SCSI_LOG_MLQUEUE_BITS             3
+#define SCSI_LOG_MLCOMPLETE_BITS          3
+#define SCSI_LOG_LLQUEUE_BITS             3
+#define SCSI_LOG_LLCOMPLETE_BITS          3
+#define SCSI_LOG_HLQUEUE_BITS             3
+#define SCSI_LOG_HLCOMPLETE_BITS          3
+#define SCSI_LOG_IOCTL_BITS               3
+
+#if CONFIG_SCSI_LOGGING
+
+#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD)     \
+{                                                       \
+        unsigned int mask;                              \
+                                                        \
+        mask = (1 << (BITS)) - 1;                       \
+        if( ((scsi_logging_level >> (SHIFT)) & mask) > (LEVEL) ) \
+        {                                               \
+                (CMD);                                  \
+        }						\
+}
+
+#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL)            \
+{                                                       \
+        unsigned int mask;                              \
+                                                        \
+        mask = ((1 << (BITS)) - 1) << SHIFT;            \
+        scsi_logging_level = ((scsi_logging_level & ~mask) \
+                              | ((LEVEL << SHIFT) & mask));     \
+}
+
+
+
+#else
+
+/*
+ * With no logging enabled, stub these out so they don't do anything.
+ */
+#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL)
+
+#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD)
+#endif
+
+/*
+ * These are the macros that are actually used throughout the code to
+ * log events.  If logging isn't enabled, they are no-ops and will be
+ * completely absent from the user's code.
+ *
+ * The 'set' versions of the macros are really intended to only be called
+ * from the /proc filesystem, and in production kernels this will be about
+ * all that is ever used.  It could be useful in a debugging environment to
+ * bump the logging level when certain strange events are detected, however.
+ */
+#define SCSI_LOG_ERROR_RECOVERY(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL,CMD);
+#define SCSI_LOG_TIMEOUT(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL,CMD);
+#define SCSI_LOG_SCAN_BUS(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL,CMD);
+#define SCSI_LOG_MLQUEUE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_MLCOMPLETE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_LLQUEUE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_LLCOMPLETE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_HLQUEUE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_HLCOMPLETE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_IOCTL(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL,CMD);
+
+
+#define SCSI_SET_ERROR_RECOVERY_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL);
+#define SCSI_SET_TIMEOUT_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL);
+#define SCSI_SET_SCAN_BUS_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL);
+#define SCSI_SET_MLQUEUE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL);
+#define SCSI_SET_MLCOMPLETE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_LLQUEUE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL);
+#define SCSI_SET_LLCOMPLETE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_HLQUEUE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL);
+#define SCSI_SET_HLCOMPLETE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_IOCTL_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL);
+
+/*
+ *  the return of the status word will be in the following format :
+ *  The low byte is the status returned by the SCSI command, 
+ *  with vendor specific bits masked.
+ *  
+ *  The next byte is the message which followed the SCSI status.
+ *  This allows a stos to be used, since the Intel is a little
+ *  endian machine.
+ *  
+ *  The final byte is a host return code, which is one of the following.
+ *  
+ *  IE 
+ *  lsb     msb
+ *  status  msg host code   
+ *  
+ *  Our errors returned by OUR driver, NOT SCSI message.  Or'd with
+ *  SCSI message passed back to driver <IF any>.
+ */
+
+
+#define DID_OK          0x00	/* NO error                                */
+#define DID_NO_CONNECT  0x01	/* Couldn't connect before timeout period  */
+#define DID_BUS_BUSY    0x02	/* BUS stayed busy through time out period */
+#define DID_TIME_OUT    0x03	/* TIMED OUT for other reason              */
+#define DID_BAD_TARGET  0x04	/* BAD target.                             */
+#define DID_ABORT       0x05	/* Told to abort for some other reason     */
+#define DID_PARITY      0x06	/* Parity error                            */
+#define DID_ERROR       0x07	/* Internal error                          */
+#define DID_RESET       0x08	/* Reset by somebody.                      */
+#define DID_BAD_INTR    0x09	/* Got an interrupt we weren't expecting.  */
+#define DID_PASSTHROUGH 0x0a	/* Force command past mid-layer            */
+#define DID_SOFT_ERROR  0x0b	/* The low level driver just wish a retry  */
+#define DRIVER_OK       0x00	/* Driver status                           */
+
+/*
+ *  These indicate the error that occurred, and what is available.
+ */
+
+#define DRIVER_BUSY         0x01
+#define DRIVER_SOFT         0x02
+#define DRIVER_MEDIA        0x03
+#define DRIVER_ERROR        0x04
+
+#define DRIVER_INVALID      0x05
+#define DRIVER_TIMEOUT      0x06
+#define DRIVER_HARD         0x07
+#define DRIVER_SENSE	    0x08
+
+#define SUGGEST_RETRY       0x10
+#define SUGGEST_ABORT       0x20
+#define SUGGEST_REMAP       0x30
+#define SUGGEST_DIE         0x40
+#define SUGGEST_SENSE       0x80
+#define SUGGEST_IS_OK       0xff
+
+#define DRIVER_MASK         0x0f
+#define SUGGEST_MASK        0xf0
+
+#define MAX_COMMAND_SIZE    16
+#define SCSI_SENSE_BUFFERSIZE   64
+
+/*
+ *  SCSI command sets
+ */
+
+#define SCSI_UNKNOWN    0
+#define SCSI_1          1
+#define SCSI_1_CCS      2
+#define SCSI_2          3
+#define SCSI_3          4
+
+/*
+ *  Every SCSI command starts with a one byte OP-code.
+ *  The next byte's high three bits are the LUN of the
+ *  device.  Any multi-byte quantities are stored high byte
+ *  first, and may have a 5 bit MSB in the same byte
+ *  as the LUN.
+ */
+
+/*
+ *  As the scsi do command functions are intelligent, and may need to
+ *  redo a command, we need to keep track of the last command
+ *  executed on each one.
+ */
+
+#define WAS_RESET       0x01
+#define WAS_TIMEDOUT    0x02
+#define WAS_SENSE       0x04
+#define IS_RESETTING    0x08
+#define IS_ABORTING     0x10
+#define ASKED_FOR_SENSE 0x20
+#define SYNC_RESET      0x40
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+#include <asm/pgtable.h>
+#define CONTIGUOUS_BUFFERS(X,Y) \
+	(virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data))
+#else
+#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data)
+#endif
+
+
+/*
+ * This is the crap from the old error handling code.  We have it in a special
+ * place so that we can more easily delete it later on.
+ */
+#include "scsi_obsolete.h"
+
+/*
+ * Add some typedefs so that we can prototyope a bunch of the functions.
+ */
+typedef struct scsi_device Scsi_Device;
+typedef struct scsi_cmnd Scsi_Cmnd;
+typedef struct scsi_request Scsi_Request;
+
+#define SCSI_CMND_MAGIC 0xE25C23A5
+#define SCSI_REQ_MAGIC  0x75F6D354
+
+/*
+ * Here is where we prototype most of the mid-layer.
+ */
+
+/*
+ *  Initializes all SCSI devices.  This scans all scsi busses.
+ */
+
+extern unsigned int scsi_logging_level;		/* What do we log? */
+extern unsigned int scsi_dma_free_sectors;	/* How much room do we have left */
+extern unsigned int scsi_need_isa_buffer;	/* True if some devices need indirection
+						   * buffers */
+extern volatile int in_scan_scsis;
+extern const unsigned char scsi_command_size[8];
+
+
+/*
+ * These are the error handling functions defined in scsi_error.c
+ */
+extern void scsi_times_out(Scsi_Cmnd * SCpnt);
+extern void scsi_add_timer(Scsi_Cmnd * SCset, int timeout,
+			   void (*complete) (Scsi_Cmnd *));
+extern int scsi_delete_timer(Scsi_Cmnd * SCset);
+extern void scsi_error_handler(void *host);
+extern int scsi_sense_valid(Scsi_Cmnd *);
+extern int scsi_decide_disposition(Scsi_Cmnd * SCpnt);
+extern int scsi_block_when_processing_errors(Scsi_Device *);
+extern void scsi_sleep(int);
+
+/*
+ * Prototypes for functions in scsicam.c
+ */
+extern int  scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+                    unsigned int *cyls, unsigned int *hds,
+                    unsigned int *secs);
+
+/*
+ * Prototypes for functions in scsi_dma.c
+ */
+void scsi_resize_dma_pool(void);
+int scsi_init_minimal_dma_pool(void);
+void *scsi_malloc(unsigned int);
+int scsi_free(void *, unsigned int);
+
+/*
+ * Prototypes for functions in scsi_merge.c
+ */
+extern void recount_segments(Scsi_Cmnd * SCpnt);
+extern void initialize_merge_fn(Scsi_Device * SDpnt);
+
+/*
+ * Prototypes for functions in scsi_queue.c
+ */
+extern int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason);
+
+/*
+ * Prototypes for functions in scsi_lib.c
+ */
+extern int scsi_maybe_unblock_host(Scsi_Device * SDpnt);
+extern Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate,
+				   int sectors);
+extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *);
+extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt);
+extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int);
+extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+			       int block_sectors);
+extern void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt);
+extern void scsi_request_fn(request_queue_t * q);
+extern int scsi_starvation_completion(Scsi_Device * SDpnt);
+
+/*
+ * Prototypes for functions in scsi.c
+ */
+extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt);
+extern void scsi_bottom_half_handler(void);
+extern void scsi_release_commandblocks(Scsi_Device * SDpnt);
+extern void scsi_build_commandblocks(Scsi_Device * SDpnt);
+extern void scsi_done(Scsi_Cmnd * SCpnt);
+extern void scsi_finish_command(Scsi_Cmnd *);
+extern int scsi_retry_command(Scsi_Cmnd *);
+extern Scsi_Cmnd *scsi_allocate_device(Scsi_Device *, int, int);
+extern void __scsi_release_command(Scsi_Cmnd *);
+extern void scsi_release_command(Scsi_Cmnd *);
+extern void scsi_do_cmd(Scsi_Cmnd *, const void *cmnd,
+			void *buffer, unsigned bufflen,
+			void (*done) (struct scsi_cmnd *),
+			int timeout, int retries);
+extern int scsi_dev_init(void);
+
+/*
+ * Newer request-based interfaces.
+ */
+extern Scsi_Request *scsi_allocate_request(Scsi_Device *);
+extern void scsi_release_request(Scsi_Request *);
+extern void scsi_wait_req(Scsi_Request *, const void *cmnd,
+			  void *buffer, unsigned bufflen,
+			  int timeout, int retries);
+
+extern void scsi_do_req(Scsi_Request *, const void *cmnd,
+			void *buffer, unsigned bufflen,
+			void (*done) (struct scsi_cmnd *),
+			int timeout, int retries);
+extern int scsi_insert_special_req(Scsi_Request * SRpnt, int);
+extern void scsi_init_cmd_from_req(Scsi_Cmnd *, Scsi_Request *);
+
+
+/*
+ * Prototypes for functions/data in hosts.c
+ */
+extern int max_scsi_hosts;
+
+/*
+ * Prototypes for functions in scsi_proc.c
+ */
+extern void proc_print_scsidevice(Scsi_Device *, char *, int *, int);
+extern struct proc_dir_entry *proc_scsi;
+
+/*
+ * Prototypes for functions in constants.c
+ */
+extern void print_command(unsigned char *);
+extern void print_sense(const char *, Scsi_Cmnd *);
+extern void print_req_sense(const char *, Scsi_Request *);
+extern void print_driverbyte(int scsiresult);
+extern void print_hostbyte(int scsiresult);
+extern void print_status (int status);
+
+/*
+ *  The scsi_device struct contains what we know about each given scsi
+ *  device.
+ *
+ * FIXME(eric) - one of the great regrets that I have is that I failed to define
+ * these structure elements as something like sdev_foo instead of foo.  This would
+ * make it so much easier to grep through sources and so forth.  I propose that
+ * all new elements that get added to these structures follow this convention.
+ * As time goes on and as people have the stomach for it, it should be possible to 
+ * go back and retrofit at least some of the elements here with with the prefix.
+ */
+
+struct scsi_device {
+/* private: */
+	/*
+	 * This information is private to the scsi mid-layer.  Wrapping it in a
+	 * struct private is a way of marking it in a sort of C++ type of way.
+	 */
+	struct scsi_device *next;	/* Used for linked list */
+	struct scsi_device *prev;	/* Used for linked list */
+#if 0
+	wait_queue_head_t   scpnt_wait;	/* Used to wait if
+					   device is busy */
+#endif
+
+	struct Scsi_Host *host;
+	request_queue_t request_queue;
+        atomic_t                device_active; /* commands checked out for device */
+	volatile unsigned short device_busy;	/* commands actually active on low-level */
+	int (*scsi_init_io_fn) (Scsi_Cmnd *);	/* Used to initialize
+						   new request */
+	Scsi_Cmnd *device_queue;	/* queue of SCSI Command structures */
+
+/* public: */
+	unsigned int id, lun, channel;
+
+	unsigned int manufacturer;	/* Manufacturer of device, for using 
+					 * vendor-specific cmd's */
+	unsigned sector_size;	/* size in bytes */
+
+	int attached;		/* # of high level drivers attached to this */
+	int detected;		/* Delta attached - don't use in drivers! */
+	int access_count;	/* Count of open channels/mounts */
+
+	void *hostdata;		/* available to low-level driver */
+#if 0
+	devfs_handle_t de;      /* directory for the device      */
+#endif
+	char type;
+	char scsi_level;
+	char vendor[8], model[16], rev[4];
+	unsigned char current_tag;	/* current tag */
+	unsigned char sync_min_period;	/* Not less than this period */
+	unsigned char sync_max_offset;	/* Not greater than this offset */
+	unsigned char queue_depth;	/* How deep a queue to use */
+
+	unsigned online:1;
+	unsigned writeable:1;
+	unsigned removable:1;
+	unsigned random:1;
+	unsigned has_cmdblocks:1;
+	unsigned changed:1;	/* Data invalid due to media change */
+	unsigned busy:1;	/* Used to prevent races */
+	unsigned lockable:1;	/* Able to prevent media removal */
+	unsigned borken:1;	/* Tell the Seagate driver to be 
+				 * painfully slow on this device */
+	unsigned tagged_supported:1;	/* Supports SCSI-II tagged queuing */
+	unsigned tagged_queue:1;	/* SCSI-II tagged queuing enabled */
+	unsigned disconnect:1;	/* can disconnect */
+	unsigned soft_reset:1;	/* Uses soft reset option */
+	unsigned sync:1;	/* Negotiate for sync transfers */
+	unsigned wide:1;	/* Negotiate for WIDE transfers */
+	unsigned single_lun:1;	/* Indicates we should only allow I/O to
+				 * one of the luns for the device at a 
+				 * time. */
+	unsigned was_reset:1;	/* There was a bus reset on the bus for 
+				 * this device */
+	unsigned expecting_cc_ua:1;	/* Expecting a CHECK_CONDITION/UNIT_ATTN
+					 * because we did a bus reset. */
+	unsigned device_blocked:1;	/* Device returned QUEUE_FULL. */
+	unsigned ten:1;		/* support ten byte read / write */
+	unsigned remap:1;	/* support remapping  */
+	unsigned starved:1;	/* unable to process commands because
+				   host busy */
+
+	// Flag to allow revalidate to succeed in sd_open
+	int allow_revalidate;
+};
+
+
+/*
+ * The Scsi_Cmnd structure is used by scsi.c internally, and for communication
+ * with low level drivers that support multiple outstanding commands.
+ */
+typedef struct scsi_pointer {
+	char *ptr;		/* data pointer */
+	int this_residual;	/* left in this buffer */
+	struct scatterlist *buffer;	/* which buffer */
+	int buffers_residual;	/* how many buffers left */
+
+        dma_addr_t dma_handle;
+
+	volatile int Status;
+	volatile int Message;
+	volatile int have_data_in;
+	volatile int sent_command;
+	volatile int phase;
+} Scsi_Pointer;
+
+/*
+ * This is essentially a slimmed down version of Scsi_Cmnd.  The point of
+ * having this is that requests that are injected into the queue as result
+ * of things like ioctls and character devices shouldn't be using a
+ * Scsi_Cmnd until such a time that the command is actually at the head
+ * of the queue and being sent to the driver.
+ */
+struct scsi_request {
+    int     sr_magic;
+    int     sr_result;	/* Status code from lower level driver */
+    unsigned char sr_sense_buffer[SCSI_SENSE_BUFFERSIZE]; 
+    /* obtained by REQUEST SENSE when CHECK CONDITION is received 
+       on original command (auto-sense) */
+    
+    struct Scsi_Host *sr_host;
+    Scsi_Device *sr_device;
+    Scsi_Cmnd *sr_command;
+#define SMHHACK
+#ifdef SMHHACK 
+    void *freeaddr; 
+#endif
+    struct request sr_request;	/* A copy of the command we are
+				   working on */
+    unsigned sr_bufflen;	/* Size of data buffer */
+    void *sr_buffer;		/* Data buffer */
+    int sr_allowed;
+    unsigned char sr_data_direction;
+    unsigned char sr_cmd_len;
+    unsigned char sr_cmnd[MAX_COMMAND_SIZE];
+    void (*sr_done) (struct scsi_cmnd *);	/* Mid-level done function */
+    int sr_timeout_per_command;
+    unsigned short sr_use_sg;	/* Number of pieces of scatter-gather */
+    unsigned short sr_sglist_len;	/* size of malloc'd scatter-gather list */
+    unsigned sr_underflow;	/* Return error if less than
+				   this amount is transferred */
+};
+
+/*
+ * FIXME(eric) - one of the great regrets that I have is that I failed to define
+ * these structure elements as something like sc_foo instead of foo.  This would
+ * make it so much easier to grep through sources and so forth.  I propose that
+ * all new elements that get added to these structures follow this convention.
+ * As time goes on and as people have the stomach for it, it should be possible to 
+ * go back and retrofit at least some of the elements here with with the prefix.
+ */
+struct scsi_cmnd {
+	int     sc_magic;
+/* private: */
+	/*
+	 * This information is private to the scsi mid-layer.  Wrapping it in a
+	 * struct private is a way of marking it in a sort of C++ type of way.
+	 */
+	struct Scsi_Host *host;
+	unsigned short state;
+	unsigned short owner;
+	Scsi_Device *device;
+	Scsi_Request *sc_request;
+	struct scsi_cmnd *next;
+	struct scsi_cmnd *reset_chain;
+
+	int eh_state;		/* Used for state tracking in error handlr */
+	void (*done) (struct scsi_cmnd *);	/* Mid-level done function */
+	/*
+	   A SCSI Command is assigned a nonzero serial_number when internal_cmnd
+	   passes it to the driver's queue command function.  The serial_number
+	   is cleared when scsi_done is entered indicating that the command has
+	   been completed.  If a timeout occurs, the serial number at the moment
+	   of timeout is copied into serial_number_at_timeout.  By subsequently
+	   comparing the serial_number and serial_number_at_timeout fields
+	   during abort or reset processing, we can detect whether the command
+	   has already completed.  This also detects cases where the command has
+	   completed and the SCSI Command structure has already being reused
+	   for another command, so that we can avoid incorrectly aborting or
+	   resetting the new command.
+	 */
+
+	unsigned long serial_number;
+	unsigned long serial_number_at_timeout;
+
+	int retries;
+	int allowed;
+	int timeout_per_command;
+	int timeout_total;
+	int timeout;
+
+	/*
+	 * We handle the timeout differently if it happens when a reset, 
+	 * abort, etc are in process. 
+	 */
+	unsigned volatile char internal_timeout;
+	struct scsi_cmnd *bh_next;	/* To enumerate the commands waiting 
+					   to be processed. */
+
+/* public: */
+
+	unsigned int target;
+	unsigned int lun;
+	unsigned int channel;
+	unsigned char cmd_len;
+	unsigned char old_cmd_len;
+	unsigned char sc_data_direction;
+	unsigned char sc_old_data_direction;
+
+	/* These elements define the operation we are about to perform */
+	unsigned char cmnd[MAX_COMMAND_SIZE];
+	unsigned request_bufflen;	/* Actual request size */
+
+	struct timer_list eh_timeout;	/* Used to time out the command. */
+	void *request_buffer;		/* Actual requested buffer */
+        void **bounce_buffers;		/* Array of bounce buffers when using scatter-gather */
+
+	/* These elements define the operation we ultimately want to perform */
+	unsigned char data_cmnd[MAX_COMMAND_SIZE];
+	unsigned short old_use_sg;	/* We save  use_sg here when requesting
+					 * sense info */
+	unsigned short use_sg;	/* Number of pieces of scatter-gather */
+	unsigned short sglist_len;	/* size of malloc'd scatter-gather list */
+	unsigned short abort_reason;	/* If the mid-level code requests an
+					 * abort, this is the reason. */
+	unsigned bufflen;	/* Size of data buffer */
+	void *buffer;		/* Data buffer */
+
+	unsigned underflow;	/* Return error if less than
+				   this amount is transferred */
+	unsigned old_underflow;	/* save underflow here when reusing the
+				 * command for error handling */
+
+	unsigned transfersize;	/* How much we are guaranteed to
+				   transfer with each SCSI transfer
+				   (ie, between disconnect / 
+				   reconnects.   Probably == sector
+				   size */
+
+	int resid;		/* Number of bytes requested to be
+				   transferred less actual number
+				   transferred (0 if not supported) */
+
+	struct request request;	/* A copy of the command we are
+				   working on */
+
+	unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE];		/* obtained by REQUEST SENSE
+						 * when CHECK CONDITION is
+						 * received on original command 
+						 * (auto-sense) */
+
+	unsigned flags;
+
+	/*
+	 * Used to indicate that a command which has timed out also
+	 * completed normally.  Typically the completion function will
+	 * do nothing but set this flag in this instance because the
+	 * timeout handler is already running.
+	 */
+	unsigned done_late:1;
+
+	/* Low-level done function - can be used by low-level driver to point
+	 *        to completion function.  Not used by mid/upper level code. */
+	void (*scsi_done) (struct scsi_cmnd *);
+
+	/*
+	 * The following fields can be written to by the host specific code. 
+	 * Everything else should be left alone. 
+	 */
+
+	Scsi_Pointer SCp;	/* Scratchpad used by some host adapters */
+
+	unsigned char *host_scribble;	/* The host adapter is allowed to
+					   * call scsi_malloc and get some memory
+					   * and hang it here.     The host adapter
+					   * is also expected to call scsi_free
+					   * to release this memory.  (The memory
+					   * obtained by scsi_malloc is guaranteed
+					   * to be at an address < 16Mb). */
+
+	int result;		/* Status code from lower level driver */
+
+	unsigned char tag;	/* SCSI-II queued command tag */
+	unsigned long pid;	/* Process ID, starts at 0 */
+};
+
+/*
+ *  Flag bit for the internal_timeout array
+ */
+#define NORMAL_TIMEOUT 0
+
+/*
+ * Definitions and prototypes used for scsi mid-level queue.
+ */
+#define SCSI_MLQUEUE_HOST_BUSY   0x1055
+#define SCSI_MLQUEUE_DEVICE_BUSY 0x1056
+
+#if 0
+#define SCSI_SLEEP(QUEUE, CONDITION) {		    \
+    if (CONDITION) {			            \
+	DECLARE_WAITQUEUE(wait, current);	    \
+	add_wait_queue(QUEUE, &wait);		    \
+	for(;;) {			            \
+	set_current_state(TASK_UNINTERRUPTIBLE);    \
+	if (CONDITION) {		            \
+            if (in_interrupt())	                    \
+	        panic("scsi: trying to call schedule() in interrupt" \
+		      ", file %s, line %d.\n", __FILE__, __LINE__);  \
+	    schedule();			\
+        }				\
+	else			        \
+	    break;      		\
+	}			        \
+	remove_wait_queue(QUEUE, &wait);\
+	current->state = TASK_RUNNING;	\
+    }; }
+#else
+#define SCSI_SLEEP(QUEUE, CONDITION) { printk("SCSI_SLEEP!\n"); BUG(); } 
+#endif
+
+
+
+
+/*
+ * old style reset request from external source
+ * (private to sg.c and scsi_error.c, supplied by scsi_obsolete.c)
+ */
+#define SCSI_TRY_RESET_DEVICE	1
+#define SCSI_TRY_RESET_BUS	2
+#define SCSI_TRY_RESET_HOST	3
+
+extern int scsi_reset_provider(Scsi_Device *, int);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4 
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_dma.c b/xen/drivers/scsi/scsi_dma.c
new file mode 100644
index 0000000000..94c2118da0
--- /dev/null
+++ b/xen/drivers/scsi/scsi_dma.c
@@ -0,0 +1,455 @@
+/*
+ *  scsi_dma.c Copyright (C) 2000 Eric Youngdale
+ *
+ *  mid-level SCSI DMA bounce buffer allocator
+ *
+ */
+
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/blk.h>
+
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+/*
+ * PAGE_SIZE must be a multiple of the sector size (512).  True
+ * for all reasonably recent architectures (even the VAX...).
+ */
+#define SECTOR_SIZE		512
+#define SECTORS_PER_PAGE	(PAGE_SIZE/SECTOR_SIZE)
+
+#if SECTORS_PER_PAGE <= 8
+typedef unsigned char FreeSectorBitmap;
+#elif SECTORS_PER_PAGE <= 32
+typedef unsigned int FreeSectorBitmap;
+#else
+#error You lose.
+#endif
+
+/*
+ * Used for access to internal allocator used for DMA safe buffers.
+ */
+static spinlock_t allocator_request_lock = SPIN_LOCK_UNLOCKED;
+
+static FreeSectorBitmap *dma_malloc_freelist = NULL;
+static int need_isa_bounce_buffers;
+static unsigned int dma_sectors = 0;
+unsigned int scsi_dma_free_sectors = 0;
+unsigned int scsi_need_isa_buffer = 0;
+static unsigned char **dma_malloc_pages = NULL;
+
+/*
+ * Function:    scsi_malloc
+ *
+ * Purpose:     Allocate memory from the DMA-safe pool.
+ *
+ * Arguments:   len       - amount of memory we need.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to memory block.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              This function can only allocate in units of sectors
+ *              (i.e. 512 bytes).
+ *
+ *              We cannot use the normal system allocator becuase we need
+ *              to be able to guarantee that we can process a complete disk
+ *              I/O request without touching the system allocator.  Think
+ *              about it - if the system were heavily swapping, and tried to
+ *              write out a block of memory to disk, and the SCSI code needed
+ *              to allocate more memory in order to be able to write the
+ *              data to disk, you would wedge the system.
+ */
+void *scsi_malloc(unsigned int len)
+{
+	unsigned int nbits, mask;
+	unsigned long flags;
+
+	int i, j;
+	if (len % SECTOR_SIZE != 0 || len > PAGE_SIZE)
+		return NULL;
+
+	nbits = len >> 9;
+	mask = (1 << nbits) - 1;
+
+	spin_lock_irqsave(&allocator_request_lock, flags);
+
+	for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++)
+		for (j = 0; j <= SECTORS_PER_PAGE - nbits; j++) {
+			if ((dma_malloc_freelist[i] & (mask << j)) == 0) {
+				dma_malloc_freelist[i] |= (mask << j);
+				scsi_dma_free_sectors -= nbits;
+#ifdef DEBUG
+				SCSI_LOG_MLQUEUE(3, printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9)));
+				printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9));
+#endif
+				spin_unlock_irqrestore(&allocator_request_lock, flags);
+				return (void *) ((unsigned long) dma_malloc_pages[i] + (j << 9));
+			}
+		}
+	spin_unlock_irqrestore(&allocator_request_lock, flags);
+	return NULL;		/* Nope.  No more */
+}
+
+/*
+ * Function:    scsi_free
+ *
+ * Purpose:     Free memory into the DMA-safe pool.
+ *
+ * Arguments:   ptr       - data block we are freeing.
+ *              len       - size of block we are freeing.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function *must* only be used to free memory
+ *              allocated from scsi_malloc().
+ *
+ *              Prior to the new queue code, this function was not SMP-safe.
+ *              This function can only allocate in units of sectors
+ *              (i.e. 512 bytes).
+ */
+int scsi_free(void *obj, unsigned int len)
+{
+	unsigned int page, sector, nbits, mask;
+	unsigned long flags;
+
+#ifdef DEBUG
+	unsigned long ret = 0;
+
+#ifdef __mips__
+	__asm__ __volatile__("move\t%0,$31":"=r"(ret));
+#else
+	ret = __builtin_return_address(0);
+#endif
+	printk("scsi_free %p %d\n", obj, len);
+	SCSI_LOG_MLQUEUE(3, printk("SFree: %p %d\n", obj, len));
+#endif
+
+	spin_lock_irqsave(&allocator_request_lock, flags);
+
+	for (page = 0; page < dma_sectors / SECTORS_PER_PAGE; page++) {
+		unsigned long page_addr = (unsigned long) dma_malloc_pages[page];
+		if ((unsigned long) obj >= page_addr &&
+		    (unsigned long) obj < page_addr + PAGE_SIZE) {
+			sector = (((unsigned long) obj) - page_addr) >> 9;
+
+			nbits = len >> 9;
+			mask = (1 << nbits) - 1;
+
+			if (sector + nbits > SECTORS_PER_PAGE)
+				panic("scsi_free:Bad memory alignment");
+
+			if ((dma_malloc_freelist[page] &
+			     (mask << sector)) != (mask << sector)) {
+#ifdef DEBUG
+				printk("scsi_free(obj=%p, len=%d) called from %08lx\n",
+				       obj, len, ret);
+#endif
+				panic("scsi_free:Trying to free unused memory");
+			}
+			scsi_dma_free_sectors += nbits;
+			dma_malloc_freelist[page] &= ~(mask << sector);
+			spin_unlock_irqrestore(&allocator_request_lock, flags);
+			return 0;
+		}
+	}
+	panic("scsi_free:Bad offset");
+	return -1; 
+}
+
+
+/*
+ * Function:    scsi_resize_dma_pool
+ *
+ * Purpose:     Ensure that the DMA pool is sufficiently large to be
+ *              able to guarantee that we can always process I/O requests
+ *              without calling the system allocator.
+ *
+ * Arguments:   None.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              Go through the device list and recompute the most appropriate
+ *              size for the dma pool.  Then grab more memory (as required).
+ */
+void scsi_resize_dma_pool(void)
+{
+	int i, k;
+	unsigned long size;
+	unsigned long flags;
+	struct Scsi_Host *shpnt;
+	struct Scsi_Host *host = NULL;
+	Scsi_Device *SDpnt;
+	FreeSectorBitmap *new_dma_malloc_freelist = NULL;
+	unsigned int new_dma_sectors = 0;
+	unsigned int new_need_isa_buffer = 0;
+	unsigned char **new_dma_malloc_pages = NULL;
+	int out_of_space = 0;
+
+	spin_lock_irqsave(&allocator_request_lock, flags);
+
+	if (!scsi_hostlist) {
+		/*
+		 * Free up the DMA pool.
+		 */
+		if (scsi_dma_free_sectors != dma_sectors)
+			panic("SCSI DMA pool memory leak %d %d\n", 
+			      scsi_dma_free_sectors, dma_sectors);
+
+		for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++)
+			free_pages((unsigned long) dma_malloc_pages[i], 0);
+		if (dma_malloc_pages)
+			kfree((char *) dma_malloc_pages);
+		dma_malloc_pages = NULL;
+		if (dma_malloc_freelist)
+			kfree((char *) dma_malloc_freelist);
+		dma_malloc_freelist = NULL;
+		dma_sectors = 0;
+		scsi_dma_free_sectors = 0;
+		spin_unlock_irqrestore(&allocator_request_lock, flags);
+		return;
+	}
+	/* Next, check to see if we need to extend the DMA buffer pool */
+
+	new_dma_sectors = 2 * SECTORS_PER_PAGE;		/* Base value we use */
+
+#if 0 
+	if (__pa(high_memory) - 1 > ISA_DMA_THRESHOLD)
+		need_isa_bounce_buffers = 1;
+	else
+#endif
+		need_isa_bounce_buffers = 0;
+
+	if (scsi_devicelist)
+		for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next)
+			new_dma_sectors += SECTORS_PER_PAGE;	/* Increment for each host */
+
+	for (host = scsi_hostlist; host; host = host->next) {
+		for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+			/*
+			 * sd and sr drivers allocate scatterlists.
+			 * sr drivers may allocate for each command 1x2048 or 2x1024 extra
+			 * buffers for 2k sector size and 1k fs.
+			 * sg driver allocates buffers < 4k.
+			 * st driver does not need buffers from the dma pool.
+			 * estimate 4k buffer/command for devices of unknown type (should panic).
+			 */
+			if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM ||
+			    SDpnt->type == TYPE_DISK || SDpnt->type == TYPE_MOD) {
+				int nents = host->sg_tablesize;
+#ifdef DMA_CHUNK_SIZE
+				/* If the architecture does DMA sg merging, make sure
+				   we count with at least 64 entries even for HBAs
+				   which handle very few sg entries.  */
+				if (nents < 64) nents = 64;
+#endif
+				new_dma_sectors += ((nents *
+				sizeof(struct scatterlist) + 511) >> 9) *
+				 SDpnt->queue_depth;
+				if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM)
+					new_dma_sectors += (2048 >> 9) * SDpnt->queue_depth;
+			} else if (SDpnt->type == TYPE_SCANNER ||
+				   SDpnt->type == TYPE_PRINTER ||
+				   SDpnt->type == TYPE_PROCESSOR ||
+				   SDpnt->type == TYPE_COMM ||
+				   SDpnt->type == TYPE_MEDIUM_CHANGER ||
+				   SDpnt->type == TYPE_ENCLOSURE) {
+				new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth;
+			} else {
+				if (SDpnt->type != TYPE_TAPE) {
+					printk("resize_dma_pool: unknown device type %d\n", SDpnt->type);
+					new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth;
+				}
+			}
+
+			if (host->unchecked_isa_dma &&
+			    need_isa_bounce_buffers &&
+			    SDpnt->type != TYPE_TAPE) {
+				new_dma_sectors += (PAGE_SIZE >> 9) * host->sg_tablesize *
+				    SDpnt->queue_depth;
+				new_need_isa_buffer++;
+			}
+		}
+	}
+
+#ifdef DEBUG_INIT
+	printk("resize_dma_pool: needed dma sectors = %d\n", new_dma_sectors);
+#endif
+
+	/* limit DMA memory to 32MB: */
+	new_dma_sectors = (new_dma_sectors + 15) & 0xfff0;
+
+	/*
+	 * We never shrink the buffers - this leads to
+	 * race conditions that I would rather not even think
+	 * about right now.
+	 */
+#if 0				/* Why do this? No gain and risks out_of_space */
+	if (new_dma_sectors < dma_sectors)
+		new_dma_sectors = dma_sectors;
+#endif
+	if (new_dma_sectors <= dma_sectors) {
+		spin_unlock_irqrestore(&allocator_request_lock, flags);
+		return;		/* best to quit while we are in front */
+        }
+
+	for (k = 0; k < 20; ++k) {	/* just in case */
+		out_of_space = 0;
+		size = (new_dma_sectors / SECTORS_PER_PAGE) *
+		    sizeof(FreeSectorBitmap);
+		new_dma_malloc_freelist = (FreeSectorBitmap *)
+		    kmalloc(size, GFP_ATOMIC);
+		if (new_dma_malloc_freelist) {
+                        memset(new_dma_malloc_freelist, 0, size);
+			size = (new_dma_sectors / SECTORS_PER_PAGE) *
+			    sizeof(*new_dma_malloc_pages);
+			new_dma_malloc_pages = (unsigned char **)
+			    kmalloc(size, GFP_ATOMIC);
+			if (!new_dma_malloc_pages) {
+				size = (new_dma_sectors / SECTORS_PER_PAGE) *
+				    sizeof(FreeSectorBitmap);
+				kfree((char *) new_dma_malloc_freelist);
+				out_of_space = 1;
+			} else {
+                                memset(new_dma_malloc_pages, 0, size);
+                        }
+		} else
+			out_of_space = 1;
+
+		if ((!out_of_space) && (new_dma_sectors > dma_sectors)) {
+			for (i = dma_sectors / SECTORS_PER_PAGE;
+			   i < new_dma_sectors / SECTORS_PER_PAGE; i++) {
+				new_dma_malloc_pages[i] = (unsigned char *)
+				    __get_free_pages(GFP_ATOMIC | GFP_DMA, 0);
+				if (!new_dma_malloc_pages[i])
+					break;
+			}
+			if (i != new_dma_sectors / SECTORS_PER_PAGE) {	/* clean up */
+				int k = i;
+
+				out_of_space = 1;
+				for (i = 0; i < k; ++i)
+					free_pages((unsigned long) new_dma_malloc_pages[i], 0);
+			}
+		}
+		if (out_of_space) {	/* try scaling down new_dma_sectors request */
+			printk("scsi::resize_dma_pool: WARNING, dma_sectors=%u, "
+			       "wanted=%u, scaling\n", dma_sectors, new_dma_sectors);
+			if (new_dma_sectors < (8 * SECTORS_PER_PAGE))
+				break;	/* pretty well hopeless ... */
+			new_dma_sectors = (new_dma_sectors * 3) / 4;
+			new_dma_sectors = (new_dma_sectors + 15) & 0xfff0;
+			if (new_dma_sectors <= dma_sectors)
+				break;	/* stick with what we have got */
+		} else
+			break;	/* found space ... */
+	}			/* end of for loop */
+	if (out_of_space) {
+		spin_unlock_irqrestore(&allocator_request_lock, flags);
+		scsi_need_isa_buffer = new_need_isa_buffer;	/* some useful info */
+		printk("      WARNING, not enough memory, pool not expanded\n");
+		return;
+	}
+	/* When we dick with the actual DMA list, we need to
+	 * protect things
+	 */
+	if (dma_malloc_freelist) {
+		size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap);
+		memcpy(new_dma_malloc_freelist, dma_malloc_freelist, size);
+		kfree((char *) dma_malloc_freelist);
+	}
+	dma_malloc_freelist = new_dma_malloc_freelist;
+
+	if (dma_malloc_pages) {
+		size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages);
+		memcpy(new_dma_malloc_pages, dma_malloc_pages, size);
+		kfree((char *) dma_malloc_pages);
+	}
+	scsi_dma_free_sectors += new_dma_sectors - dma_sectors;
+	dma_malloc_pages = new_dma_malloc_pages;
+	dma_sectors = new_dma_sectors;
+	scsi_need_isa_buffer = new_need_isa_buffer;
+
+	spin_unlock_irqrestore(&allocator_request_lock, flags);
+
+#ifdef DEBUG_INIT
+	printk("resize_dma_pool: dma free sectors   = %d\n", scsi_dma_free_sectors);
+	printk("resize_dma_pool: dma sectors        = %d\n", dma_sectors);
+	printk("resize_dma_pool: need isa buffers   = %d\n", scsi_need_isa_buffer);
+#endif
+}
+
+/*
+ * Function:    scsi_init_minimal_dma_pool
+ *
+ * Purpose:     Allocate a minimal (1-page) DMA pool.
+ *
+ * Arguments:   None.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       
+ */
+int scsi_init_minimal_dma_pool(void)
+{
+	unsigned long size;
+	unsigned long flags;
+	int has_space = 0;
+
+	spin_lock_irqsave(&allocator_request_lock, flags);
+
+	dma_sectors = PAGE_SIZE / SECTOR_SIZE;
+	scsi_dma_free_sectors = dma_sectors;
+	/*
+	 * Set up a minimal DMA buffer list - this will be used during scan_scsis
+	 * in some cases.
+	 */
+
+	/* One bit per sector to indicate free/busy */
+	size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap);
+	dma_malloc_freelist = (FreeSectorBitmap *)
+	    kmalloc(size, GFP_ATOMIC);
+	if (dma_malloc_freelist) {
+                memset(dma_malloc_freelist, 0, size);
+		/* One pointer per page for the page list */
+		dma_malloc_pages = (unsigned char **) kmalloc(
+                        (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages),
+							     GFP_ATOMIC);
+		if (dma_malloc_pages) {
+                        memset(dma_malloc_pages, 0, size);
+			dma_malloc_pages[0] = (unsigned char *)
+			    __get_free_pages(GFP_ATOMIC | GFP_DMA, 0);
+			if (dma_malloc_pages[0])
+				has_space = 1;
+		}
+	}
+	if (!has_space) {
+		if (dma_malloc_freelist) {
+			kfree((char *) dma_malloc_freelist);
+			if (dma_malloc_pages)
+				kfree((char *) dma_malloc_pages);
+		}
+		spin_unlock_irqrestore(&allocator_request_lock, flags);
+		printk("scsi::init_module: failed, out of memory\n");
+		return 1;
+	}
+
+	spin_unlock_irqrestore(&allocator_request_lock, flags);
+	return 0;
+}
diff --git a/xen/drivers/scsi/scsi_error.c b/xen/drivers/scsi/scsi_error.c
new file mode 100644
index 0000000000..6c043937be
--- /dev/null
+++ b/xen/drivers/scsi/scsi_error.c
@@ -0,0 +1,2063 @@
+/*
+ *  scsi_error.c Copyright (C) 1997 Eric Youngdale
+ *
+ *  SCSI error/timeout handling
+ *      Initial versions: Eric Youngdale.  Based upon conversations with
+ *                        Leonard Zubkoff and David Miller at Linux Expo, 
+ *                        ideas originating from all over the place.
+ *
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/*#include <xeno/string.h>*/
+#include <xeno/slab.h>
+#include <xeno/ioport.h>
+#include <xeno/kernel.h>
+/*#include <xeno/stat.h>*/
+#include <xeno/blk.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+/*#include <xeno/smp_lock.h>*/
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+/*
+ * We must always allow SHUTDOWN_SIGS.  Even if we are not a module,
+ * the host drivers that we are using may be loaded as modules, and
+ * when we unload these,  we need to ensure that the error handler thread
+ * can be shut down.
+ *
+ * Note - when we unload a module, we send a SIGHUP.  We mustn't
+ * enable SIGTERM, as this is how the init shuts things down when you
+ * go to single-user mode.  For that matter, init also sends SIGKILL,
+ * so we mustn't enable that one either.  We use SIGHUP instead.  Other
+ * options would be SIGPWR, I suppose.
+ */
+#define SHUTDOWN_SIGS	(sigmask(SIGHUP))
+
+#ifdef DEBUG
+#define SENSE_TIMEOUT SCSI_TIMEOUT
+#define ABORT_TIMEOUT SCSI_TIMEOUT
+#define RESET_TIMEOUT SCSI_TIMEOUT
+#else
+#define SENSE_TIMEOUT (10*HZ)
+#define RESET_TIMEOUT (2*HZ)
+#define ABORT_TIMEOUT (15*HZ)
+#endif
+
+#define STATIC
+
+/*
+ * These should *probably* be handled by the host itself.
+ * Since it is allowed to sleep, it probably should.
+ */
+#define BUS_RESET_SETTLE_TIME   5*HZ
+#define HOST_RESET_SETTLE_TIME  10*HZ
+
+
+static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $";
+
+STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt);
+STATIC int scsi_request_sense(Scsi_Cmnd *);
+STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout);
+STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int);
+STATIC int scsi_test_unit_ready(Scsi_Cmnd *);
+STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout);
+STATIC int scsi_try_bus_reset(Scsi_Cmnd *);
+STATIC int scsi_try_host_reset(Scsi_Cmnd *);
+STATIC int scsi_unit_is_ready(Scsi_Cmnd *);
+STATIC void scsi_eh_action_done(Scsi_Cmnd *, int);
+STATIC int scsi_eh_retry_command(Scsi_Cmnd *);
+STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt);
+STATIC void scsi_restart_operations(struct Scsi_Host *);
+STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt);
+
+
+/*
+ * Function:    scsi_add_timer()
+ *
+ * Purpose:     Start timeout timer for a single scsi command.
+ *
+ * Arguments:   SCset   - command that is about to start running.
+ *              timeout - amount of time to allow this command to run.
+ *              complete - timeout function to call if timer isn't
+ *                      canceled.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This should be turned into an inline function.
+ *
+ * More Notes:  Each scsi command has it's own timer, and as it is added to
+ *              the queue, we set up the timer.  When the command completes,
+ *              we cancel the timer.  Pretty simple, really, especially
+ *              compared to the old way of handling this crap.
+ */
+void scsi_add_timer(Scsi_Cmnd * SCset,
+		    int timeout,
+		    void (*complete) (Scsi_Cmnd *))
+{
+
+	/*
+	 * If the clock was already running for this command, then
+	 * first delete the timer.  The timer handling code gets rather
+	 * confused if we don't do this.
+	 */
+	if (SCset->eh_timeout.function != NULL) {
+		del_timer(&SCset->eh_timeout);
+	}
+	SCset->eh_timeout.data = (unsigned long) SCset;
+	SCset->eh_timeout.expires = jiffies + timeout;
+	SCset->eh_timeout.function = (void (*)(unsigned long)) complete;
+
+	SCset->done_late = 0;
+
+	SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete));
+
+	add_timer(&SCset->eh_timeout);
+
+}
+
+/*
+ * Function:    scsi_delete_timer()
+ *
+ * Purpose:     Delete/cancel timer for a given function.
+ *
+ * Arguments:   SCset   - command that we are canceling timer for.
+ *
+ * Returns:     1 if we were able to detach the timer.  0 if we
+ *              blew it, and the timer function has already started
+ *              to run.
+ *
+ * Notes:       This should be turned into an inline function.
+ */
+int scsi_delete_timer(Scsi_Cmnd * SCset)
+{
+	int rtn;
+
+	rtn = del_timer(&SCset->eh_timeout);
+
+	SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn));
+
+	SCset->eh_timeout.data = (unsigned long) NULL;
+	SCset->eh_timeout.function = NULL;
+
+	return rtn;
+}
+
+/*
+ * Function:    scsi_times_out()
+ *
+ * Purpose:     Timeout function for normal scsi commands..
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       We do not need to lock this.  There is the potential for
+ *              a race only in that the normal completion handling might
+ *              run, but if the normal completion function determines
+ *              that the timer has already fired, then it mustn't do
+ *              anything.
+ */
+void scsi_times_out(Scsi_Cmnd * SCpnt)
+{
+	/* 
+	 * Notify the low-level code that this operation failed and we are
+	 * reposessing the command.  
+	 */
+#ifdef ERIC_neverdef
+	/*
+	 * FIXME(eric)
+	 * Allow the host adapter to push a queue ordering tag
+	 * out to the bus to force the command in question to complete.
+	 * If the host wants to do this, then we just restart the timer
+	 * for the command.  Before we really do this, some real thought
+	 * as to the optimum way to handle this should be done.  We *do*
+	 * need to force ordering every so often to ensure that all requests
+	 * do eventually complete, but I am not sure if this is the best way
+	 * to actually go about it.
+	 *
+	 * Better yet, force a sync here, but don't block since we are in an
+	 * interrupt.
+	 */
+	if (SCpnt->host->hostt->eh_ordered_queue_tag) {
+		if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) {
+			scsi_add_timer(SCpnt, SCpnt->internal_timeout,
+				       scsi_times_out);
+			return;
+		}
+	}
+	/*
+	 * FIXME(eric) - add a second special interface to handle this
+	 * case.  Ideally that interface can also be used to request
+	 * a queu
+	 */
+	if (SCpnt->host->can_queue) {
+		SCpnt->host->hostt->queuecommand(SCpnt, NULL);
+	}
+#endif
+
+	/* Set the serial_number_at_timeout to the current serial_number */
+	SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+	SCpnt->eh_state = FAILED;
+	SCpnt->state = SCSI_STATE_TIMEOUT;
+	SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+
+	SCpnt->host->in_recovery = 1;
+	SCpnt->host->host_failed++;
+
+	SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n",
+				   atomic_read(&SCpnt->host->host_active),
+				   SCpnt->host->host_busy,
+				   SCpnt->host->host_failed));
+
+#if 0
+	/*
+	 * If the host is having troubles, then look to see if this was the last
+	 * command that might have failed.  If so, wake up the error handler.
+	 */
+	if( SCpnt->host->eh_wait == NULL ) {
+		panic("Error handler thread not present at %p %p %s %d", 
+		      SCpnt, SCpnt->host, __FILE__, __LINE__);
+	}
+	if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
+		up(SCpnt->host->eh_wait);
+	}
+#endif
+}
+
+/*
+ * Function     scsi_block_when_processing_errors
+ *
+ * Purpose:     Prevent more commands from being queued while error recovery
+ *              is taking place.
+ *
+ * Arguments:   SDpnt - device on which we are performing recovery.
+ *
+ * Returns:     FALSE   The device was taken offline by error recovery.
+ *              TRUE    OK to proceed.
+ *
+ * Notes:       We block until the host is out of error recovery, and then
+ *              check to see whether the host or the device is offline.
+ */
+int scsi_block_when_processing_errors(Scsi_Device * SDpnt)
+{
+
+	SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery);
+
+	SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online));
+
+	return SDpnt->online;
+}
+
+/*
+ * Function:    scsi_eh_times_out()
+ *
+ * Purpose:     Timeout function for error handling.
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       During error handling, the kernel thread will be sleeping
+ *              waiting for some action to complete on the device.  Our only
+ *              job is to record that it timed out, and to wake up the
+ *              thread.
+ */
+STATIC
+void scsi_eh_times_out(Scsi_Cmnd * SCpnt)
+{
+	SCpnt->eh_state = SCSI_STATE_TIMEOUT;
+	SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt));
+
+#if 0
+	if (SCpnt->host->eh_action != NULL)
+		up(SCpnt->host->eh_action);
+	else
+#endif
+		printk("Missing scsi error handler thread\n");
+}
+
+
+/*
+ * Function:    scsi_eh_done()
+ *
+ * Purpose:     Completion function for error handling.
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       During error handling, the kernel thread will be sleeping
+ *              waiting for some action to complete on the device.  Our only
+ *              job is to record that the action completed, and to wake up the
+ *              thread.
+ */
+STATIC
+void scsi_eh_done(Scsi_Cmnd * SCpnt)
+{
+	int     rtn;
+
+	/*
+	 * If the timeout handler is already running, then just set the
+	 * flag which says we finished late, and return.  We have no
+	 * way of stopping the timeout handler from running, so we must
+	 * always defer to it.
+	 */
+	rtn = del_timer(&SCpnt->eh_timeout);
+	if (!rtn) {
+		SCpnt->done_late = 1;
+		return;
+	}
+
+	SCpnt->request.rq_status = RQ_SCSI_DONE;
+
+	SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+	SCpnt->eh_state = SUCCESS;
+
+	SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt,
+					  SCpnt->result));
+
+#if 0
+	if (SCpnt->host->eh_action != NULL)
+		up(SCpnt->host->eh_action);
+#endif
+}
+
+/*
+ * Function:    scsi_eh_action_done()
+ *
+ * Purpose:     Completion function for error handling.
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *              answer  - boolean that indicates whether operation succeeded.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       This callback is only used for abort and reset operations.
+ */
+STATIC
+void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer)
+{
+	SCpnt->request.rq_status = RQ_SCSI_DONE;
+
+	SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+	SCpnt->eh_state = (answer ? SUCCESS : FAILED);
+#if 0
+	if (SCpnt->host->eh_action != NULL)
+		up(SCpnt->host->eh_action);
+#endif
+}
+
+/*
+ * Function:  scsi_sense_valid()
+ *
+ * Purpose:     Determine whether a host has automatically obtained sense
+ *              information or not.  If we have it, then give a recommendation
+ *              as to what we should do next.
+ */
+int scsi_sense_valid(Scsi_Cmnd * SCpnt)
+{
+	if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
+		return FALSE;
+	}
+	return TRUE;
+}
+
+/*
+ * Function:  scsi_eh_retry_command()
+ *
+ * Purpose:     Retry the original command
+ *
+ * Returns:     SUCCESS - we were able to get the sense data.
+ *              FAILED  - we were not able to get the sense data.
+ * 
+ * Notes:       This function will *NOT* return until the command either
+ *              times out, or it completes.
+ */
+STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt)
+{
+	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+	       sizeof(SCpnt->data_cmnd));
+	SCpnt->request_buffer = SCpnt->buffer;
+	SCpnt->request_bufflen = SCpnt->bufflen;
+	SCpnt->use_sg = SCpnt->old_use_sg;
+	SCpnt->cmd_len = SCpnt->old_cmd_len;
+	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+	SCpnt->underflow = SCpnt->old_underflow;
+
+	scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command);
+
+	/*
+	 * Hey, we are done.  Let's look to see what happened.
+	 */
+	return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_request_sense()
+ *
+ * Purpose:     Request sense data from a particular target.
+ *
+ * Returns:     SUCCESS - we were able to get the sense data.
+ *              FAILED  - we were not able to get the sense data.
+ * 
+ * Notes:       Some hosts automatically obtain this information, others
+ *              require that we obtain it on our own.
+ *
+ *              This function will *NOT* return until the command either
+ *              times out, or it completes.
+ */
+STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
+{
+	static unsigned char generic_sense[6] =
+	{REQUEST_SENSE, 0, 0, 0, 255, 0};
+	unsigned char scsi_result0[256], *scsi_result = NULL;
+	int saved_result;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
+	       sizeof(generic_sense));
+
+	if (SCpnt->device->scsi_level <= SCSI_2)
+		SCpnt->cmnd[1] = SCpnt->lun << 5;
+
+	scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma)
+	    ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);
+
+	if (scsi_result == NULL) {
+		printk("cannot allocate scsi_result in scsi_request_sense.\n");
+		return FAILED;
+	}
+	/*
+	 * Zero the sense buffer.  Some host adapters automatically always request
+	 * sense, so it is not a good idea that SCpnt->request_buffer and
+	 * SCpnt->sense_buffer point to the same address (DB).
+	 * 0 is not a valid sense code. 
+	 */
+	memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+	memset((void *) scsi_result, 0, 256);
+
+	saved_result = SCpnt->result;
+	SCpnt->request_buffer = scsi_result;
+	SCpnt->request_bufflen = 256;
+	SCpnt->use_sg = 0;
+	SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+	SCpnt->sc_data_direction = SCSI_DATA_READ;
+	SCpnt->underflow = 0;
+
+	scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+
+	/* Last chance to have valid sense data */
+	if (!scsi_sense_valid(SCpnt))
+		memcpy((void *) SCpnt->sense_buffer,
+		       SCpnt->request_buffer,
+		       sizeof(SCpnt->sense_buffer));
+
+	if (scsi_result != &scsi_result0[0] && scsi_result != NULL)
+		kfree(scsi_result);
+
+	/*
+	 * When we eventually call scsi_finish, we really wish to complete
+	 * the original request, so let's restore the original data. (DB)
+	 */
+	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+	       sizeof(SCpnt->data_cmnd));
+	SCpnt->result = saved_result;
+	SCpnt->request_buffer = SCpnt->buffer;
+	SCpnt->request_bufflen = SCpnt->bufflen;
+	SCpnt->use_sg = SCpnt->old_use_sg;
+	SCpnt->cmd_len = SCpnt->old_cmd_len;
+	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+	SCpnt->underflow = SCpnt->old_underflow;
+
+	/*
+	 * Hey, we are done.  Let's look to see what happened.
+	 */
+	return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_test_unit_ready()
+ *
+ * Purpose:     Run test unit ready command to see if the device is talking to us or not.
+ *
+ */
+STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt)
+{
+	static unsigned char tur_command[6] =
+	{TEST_UNIT_READY, 0, 0, 0, 0, 0};
+
+	memcpy((void *) SCpnt->cmnd, (void *) tur_command,
+	       sizeof(tur_command));
+
+	if (SCpnt->device->scsi_level <= SCSI_2)
+		SCpnt->cmnd[1] = SCpnt->lun << 5;
+
+	/*
+	 * Zero the sense buffer.  The SCSI spec mandates that any
+	 * untransferred sense data should be interpreted as being zero.
+	 */
+	memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+
+	SCpnt->request_buffer = NULL;
+	SCpnt->request_bufflen = 0;
+	SCpnt->use_sg = 0;
+	SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+	SCpnt->underflow = 0;
+	SCpnt->sc_data_direction = SCSI_DATA_NONE;
+
+	scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+
+	/*
+	 * When we eventually call scsi_finish, we really wish to complete
+	 * the original request, so let's restore the original data. (DB)
+	 */
+	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+	       sizeof(SCpnt->data_cmnd));
+	SCpnt->request_buffer = SCpnt->buffer;
+	SCpnt->request_bufflen = SCpnt->bufflen;
+	SCpnt->use_sg = SCpnt->old_use_sg;
+	SCpnt->cmd_len = SCpnt->old_cmd_len;
+	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+	SCpnt->underflow = SCpnt->old_underflow;
+
+	/*
+	 * Hey, we are done.  Let's look to see what happened.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(3,
+		printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n",
+		SCpnt, SCpnt->eh_state));
+	return SCpnt->eh_state;
+}
+
+/*
+ * This would normally need to get the IO request lock,
+ * but as it doesn't actually touch anything that needs
+ * to be locked we can avoid the lock here..
+ */
+STATIC
+void scsi_sleep_done(struct semaphore *sem)
+{
+#if 0
+	if (sem != NULL) {
+		up(sem);
+	}
+#endif
+}
+
+void scsi_sleep(int timeout)
+{
+#if 0 
+	DECLARE_MUTEX_LOCKED(sem);
+#endif
+	struct timer_list timer;
+
+	init_timer(&timer);
+#if 0
+	timer.data = (unsigned long) &sem;
+#else 
+        timer.data = 0xDEADBEEF; 
+#endif
+	timer.expires = jiffies + timeout;
+	timer.function = (void (*)(unsigned long)) scsi_sleep_done;
+
+	SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout));
+
+	add_timer(&timer);
+
+#if 0
+	down(&sem);
+#endif
+	del_timer(&timer);
+}
+
+/*
+ * Function:  scsi_send_eh_cmnd
+ *
+ * Purpose:     Send a command out to a device as part of error recovery.
+ *
+ * Notes:       The initialization of the structures is quite a bit different
+ *              in this case, and furthermore, there is a different completion
+ *              handler.
+ */
+STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
+{
+	unsigned long flags;
+	struct Scsi_Host *host;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	host = SCpnt->host;
+
+      retry:
+	/*
+	 * We will use a queued command if possible, otherwise we will emulate the
+	 * queuing and calling of completion function ourselves.
+	 */
+	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+	if (host->can_queue) {
+#if 0
+		DECLARE_MUTEX_LOCKED(sem);
+#endif
+
+		SCpnt->eh_state = SCSI_STATE_QUEUED;
+
+		scsi_add_timer(SCpnt, timeout, scsi_eh_times_out);
+
+#if 0
+		/*
+		 * Set up the semaphore so we wait for the command to complete.
+		 */
+		SCpnt->host->eh_action = &sem;
+#endif
+		SCpnt->request.rq_status = RQ_SCSI_BUSY;
+
+		spin_lock_irqsave(&io_request_lock, flags);
+		host->hostt->queuecommand(SCpnt, scsi_eh_done);
+		spin_unlock_irqrestore(&io_request_lock, flags);
+
+#if 0
+		down(&sem);
+#endif
+
+		SCpnt->host->eh_action = NULL;
+
+		/*
+		 * See if timeout.  If so, tell the host to forget about it.
+		 * In other words, we don't want a callback any more.
+		 */
+		if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) {
+                        SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+			/*
+			 * As far as the low level driver is
+			 * concerned, this command is still active, so
+			 * we must give the low level driver a chance
+			 * to abort it. (DB) 
+			 *
+			 * FIXME(eric) - we are not tracking whether we could
+			 * abort a timed out command or not.  Not sure how
+			 * we should treat them differently anyways.
+			 */
+			spin_lock_irqsave(&io_request_lock, flags);
+			if (SCpnt->host->hostt->eh_abort_handler)
+				SCpnt->host->hostt->eh_abort_handler(SCpnt);
+			spin_unlock_irqrestore(&io_request_lock, flags);
+			
+			SCpnt->request.rq_status = RQ_SCSI_DONE;
+			SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+			
+			SCpnt->eh_state = FAILED;
+		}
+		SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n",
+						SCpnt, SCpnt->eh_state));
+	} else {
+		int temp;
+
+		/*
+		 * We damn well had better never use this code.  There is no timeout
+		 * protection here, since we would end up waiting in the actual low
+		 * level driver, we don't know how to wake it up.
+		 */
+		spin_lock_irqsave(&io_request_lock, flags);
+		temp = host->hostt->command(SCpnt);
+		spin_unlock_irqrestore(&io_request_lock, flags);
+
+		SCpnt->result = temp;
+		/* Fall through to code below to examine status. */
+		SCpnt->eh_state = SUCCESS;
+	}
+
+	/*
+	 * Now examine the actual status codes to see whether the command actually
+	 * did complete normally.
+	 */
+	if (SCpnt->eh_state == SUCCESS) {
+		int ret = scsi_eh_completed_normally(SCpnt);
+		SCSI_LOG_ERROR_RECOVERY(3,
+			printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret));
+		switch (ret) {
+		case SUCCESS:
+			SCpnt->eh_state = SUCCESS;
+			break;
+		case NEEDS_RETRY:
+			goto retry;
+		case FAILED:
+		default:
+			SCpnt->eh_state = FAILED;
+			break;
+		}
+	} else {
+		SCpnt->eh_state = FAILED;
+	}
+}
+
+/*
+ * Function:  scsi_unit_is_ready()
+ *
+ * Purpose:     Called after TEST_UNIT_READY is run, to test to see if
+ *              the unit responded in a way that indicates it is ready.
+ */
+STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
+{
+	if (SCpnt->result) {
+		if (((driver_byte(SCpnt->result) & DRIVER_SENSE) ||
+		     (status_byte(SCpnt->result) & CHECK_CONDITION)) &&
+		    ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) {
+			if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) &&
+			    ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
+			    ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
+				return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+/*
+ * Function:    scsi_eh_finish_command
+ *
+ * Purpose:     Handle a command that we are finished with WRT error handling.
+ *
+ * Arguments:   SClist - pointer to list into which we are putting completed commands.
+ *              SCpnt  - command that is completing
+ *
+ * Notes:       We don't want to use the normal command completion while we are
+ *              are still handling errors - it may cause other commands to be queued,
+ *              and that would disturb what we are doing.  Thus we really want to keep
+ *              a list of pending commands for final completion, and once we
+ *              are ready to leave error handling we handle completion for real.
+ */
+STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt)
+{
+	SCpnt->state = SCSI_STATE_BHQUEUE;
+	SCpnt->bh_next = *SClist;
+	/*
+	 * Set this back so that the upper level can correctly free up
+	 * things.
+	 */
+	SCpnt->use_sg = SCpnt->old_use_sg;
+	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+	SCpnt->underflow = SCpnt->old_underflow;
+	*SClist = SCpnt;
+}
+
+/*
+ * Function:  scsi_try_to_abort_command
+ *
+ * Purpose:     Ask host adapter to abort a running command.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:       This function will not return until the user's completion
+ *              function has been called.  There is no timeout on this
+ *              operation.  If the author of the low-level driver wishes
+ *              this operation to be timed, they can provide this facility
+ *              themselves.  Helper functions in scsi_error.c can be supplied
+ *              to make this easier to do.
+ *
+ * Notes:       It may be possible to combine this with all of the reset
+ *              handling to eliminate a lot of code duplication.  I don't
+ *              know what makes more sense at the moment - this is just a
+ *              prototype.
+ */
+STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout)
+{
+	int rtn;
+	unsigned long flags;
+
+	SCpnt->eh_state = FAILED;	/* Until we come up with something better */
+
+	if (SCpnt->host->hostt->eh_abort_handler == NULL) {
+		return FAILED;
+	}
+	/* 
+	 * scsi_done was called just after the command timed out and before
+	 * we had a chance to process it. (DB)
+	 */
+	if (SCpnt->serial_number == 0)
+		return SUCCESS;
+
+	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+	return rtn;
+}
+
+/*
+ * Function:  scsi_try_bus_device_reset
+ *
+ * Purpose:     Ask host adapter to perform a bus device reset for a given
+ *              device.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:       There is no timeout for this operation.  If this operation is
+ *              unreliable for a given host, then the host itself needs to put a
+ *              timer on it, and set the host back to a consistent state prior
+ *              to returning.
+ */
+STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout)
+{
+	unsigned long flags;
+	int rtn;
+
+	SCpnt->eh_state = FAILED;	/* Until we come up with something better */
+
+	if (SCpnt->host->hostt->eh_device_reset_handler == NULL) {
+		return FAILED;
+	}
+	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	if (rtn == SUCCESS)
+		SCpnt->eh_state = SUCCESS;
+
+	return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_try_bus_reset
+ *
+ * Purpose:     Ask host adapter to perform a bus reset for a host.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:       
+ */
+STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt)
+{
+	unsigned long flags;
+	int rtn;
+
+	SCpnt->eh_state = FAILED;	/* Until we come up with something better */
+	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+	SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+	if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) {
+		return FAILED;
+	}
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	if (rtn == SUCCESS)
+		SCpnt->eh_state = SUCCESS;
+
+	/*
+	 * If we had a successful bus reset, mark the command blocks to expect
+	 * a condition code of unit attention.
+	 */
+	scsi_sleep(BUS_RESET_SETTLE_TIME);
+	if (SCpnt->eh_state == SUCCESS) {
+		Scsi_Device *SDloop;
+		for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
+			if (SCpnt->channel == SDloop->channel) {
+				SDloop->was_reset = 1;
+				SDloop->expecting_cc_ua = 1;
+			}
+		}
+	}
+	return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_try_host_reset
+ *
+ * Purpose:     Ask host adapter to reset itself, and the bus.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:
+ */
+STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt)
+{
+	unsigned long flags;
+	int rtn;
+
+	SCpnt->eh_state = FAILED;	/* Until we come up with something better */
+	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+	SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+	if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
+		return FAILED;
+	}
+	spin_lock_irqsave(&io_request_lock, flags);
+	rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	if (rtn == SUCCESS)
+		SCpnt->eh_state = SUCCESS;
+
+	/*
+	 * If we had a successful host reset, mark the command blocks to expect
+	 * a condition code of unit attention.
+	 */
+	scsi_sleep(HOST_RESET_SETTLE_TIME);
+	if (SCpnt->eh_state == SUCCESS) {
+		Scsi_Device *SDloop;
+		for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
+			SDloop->was_reset = 1;
+			SDloop->expecting_cc_ua = 1;
+		}
+	}
+	return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_decide_disposition
+ *
+ * Purpose:     Examine a command block that has come back from the low-level
+ *              and figure out what to do next.
+ *
+ * Returns:     SUCCESS         - pass on to upper level.
+ *              FAILED          - pass on to error handler thread.
+ *              RETRY           - command should be retried.
+ *              SOFTERR         - command succeeded, but we need to log
+ *                                a soft error.
+ *
+ * Notes:       This is *ONLY* called when we are examining the status
+ *              after sending out the actual data command.  Any commands
+ *              that are queued for error recovery (i.e. TEST_UNIT_READY)
+ *              do *NOT* come through here.
+ *
+ *              NOTE - When this routine returns FAILED, it means the error
+ *              handler thread is woken.  In cases where the error code
+ *              indicates an error that doesn't require the error handler
+ *              thread (i.e. we don't need to abort/reset), then this function
+ *              should return SUCCESS.
+ */
+int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
+{
+	int rtn;
+
+	/*
+	 * If the device is offline, then we clearly just pass the result back
+	 * up to the top level.
+	 */
+	if (SCpnt->device->online == FALSE) {
+		SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n"));
+		return SUCCESS;
+	}
+	/*
+	 * First check the host byte, to see if there is anything in there
+	 * that would indicate what we need to do.
+	 */
+
+	switch (host_byte(SCpnt->result)) {
+	case DID_PASSTHROUGH:
+		/*
+		 * No matter what, pass this through to the upper layer.
+		 * Nuke this special code so that it looks like we are saying
+		 * DID_OK.
+		 */
+		SCpnt->result &= 0xff00ffff;
+		return SUCCESS;
+	case DID_OK:
+		/*
+		 * Looks good.  Drop through, and check the next byte.
+		 */
+		break;
+	case DID_NO_CONNECT:
+	case DID_BAD_TARGET:
+	case DID_ABORT:
+		/*
+		 * Note - this means that we just report the status back to the
+		 * top level driver, not that we actually think that it indicates
+		 * success.
+		 */
+		return SUCCESS;
+		/*
+		 * When the low level driver returns DID_SOFT_ERROR,
+		 * it is responsible for keeping an internal retry counter 
+		 * in order to avoid endless loops (DB)
+		 *
+		 * Actually this is a bug in this function here.  We should
+		 * be mindful of the maximum number of retries specified
+		 * and not get stuck in a loop.
+		 */
+	case DID_SOFT_ERROR:
+		goto maybe_retry;
+
+	case DID_ERROR:
+		if (msg_byte(SCpnt->result) == COMMAND_COMPLETE &&
+		    status_byte(SCpnt->result) == RESERVATION_CONFLICT)
+			/*
+			 * execute reservation conflict processing code
+			 * lower down
+			 */
+			break;
+		/* FALLTHROUGH */
+
+	case DID_BUS_BUSY:
+	case DID_PARITY:
+		goto maybe_retry;
+	case DID_TIME_OUT:
+		/*
+		 * When we scan the bus, we get timeout messages for
+		 * these commands if there is no device available.
+		 * Other hosts report DID_NO_CONNECT for the same thing.
+		 */
+		if ((SCpnt->cmnd[0] == TEST_UNIT_READY ||
+		     SCpnt->cmnd[0] == INQUIRY)) {
+			return SUCCESS;
+		} else {
+			return FAILED;
+		}
+	case DID_RESET:
+		/*
+		 * In the normal case where we haven't initiated a reset, this is
+		 * a failure.
+		 */
+		if (SCpnt->flags & IS_RESETTING) {
+			SCpnt->flags &= ~IS_RESETTING;
+			goto maybe_retry;
+		}
+		return SUCCESS;
+	default:
+		return FAILED;
+	}
+
+	/*
+	 * Next, check the message byte.
+	 */
+	if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+		return FAILED;
+	}
+	/*
+	 * Now, check the status byte to see if this indicates anything special.
+	 */
+	switch (status_byte(SCpnt->result)) {
+	case QUEUE_FULL:
+		/*
+		 * The case of trying to send too many commands to a tagged queueing
+		 * device.
+		 */
+		return ADD_TO_MLQUEUE;
+	case GOOD:
+	case COMMAND_TERMINATED:
+		return SUCCESS;
+	case CHECK_CONDITION:
+		rtn = scsi_check_sense(SCpnt);
+		if (rtn == NEEDS_RETRY) {
+			goto maybe_retry;
+		}
+		return rtn;
+	case CONDITION_GOOD:
+	case INTERMEDIATE_GOOD:
+	case INTERMEDIATE_C_GOOD:
+		/*
+		 * Who knows?  FIXME(eric)
+		 */
+		return SUCCESS;
+	case BUSY:
+		goto maybe_retry;
+
+	case RESERVATION_CONFLICT:
+		printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", 
+		       SCpnt->host->host_no, SCpnt->channel,
+		       SCpnt->device->id, SCpnt->device->lun);
+		return SUCCESS; /* causes immediate I/O error */
+	default:
+		return FAILED;
+	}
+	return FAILED;
+
+      maybe_retry:
+
+	if ((++SCpnt->retries) < SCpnt->allowed) {
+		return NEEDS_RETRY;
+	} else {
+                /*
+                 * No more retries - report this one back to upper level.
+                 */
+		return SUCCESS;
+	}
+}
+
+/*
+ * Function:  scsi_eh_completed_normally
+ *
+ * Purpose:     Examine a command block that has come back from the low-level
+ *              and figure out what to do next.
+ *
+ * Returns:     SUCCESS         - pass on to upper level.
+ *              FAILED          - pass on to error handler thread.
+ *              RETRY           - command should be retried.
+ *              SOFTERR         - command succeeded, but we need to log
+ *                                a soft error.
+ *
+ * Notes:       This is *ONLY* called when we are examining the status
+ *              of commands queued during error recovery.  The main
+ *              difference here is that we don't allow for the possibility
+ *              of retries here, and we are a lot more restrictive about what
+ *              we consider acceptable.
+ */
+STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)
+{
+	/*
+	 * First check the host byte, to see if there is anything in there
+	 * that would indicate what we need to do.
+	 */
+	if (host_byte(SCpnt->result) == DID_RESET) {
+		if (SCpnt->flags & IS_RESETTING) {
+			/*
+			 * OK, this is normal.  We don't know whether in fact the
+			 * command in question really needs to be rerun or not - 
+			 * if this was the original data command then the answer is yes,
+			 * otherwise we just flag it as success.
+			 */
+			SCpnt->flags &= ~IS_RESETTING;
+			return NEEDS_RETRY;
+		}
+		/*
+		 * Rats.  We are already in the error handler, so we now get to try
+		 * and figure out what to do next.  If the sense is valid, we have
+		 * a pretty good idea of what to do.  If not, we mark it as failed.
+		 */
+		return scsi_check_sense(SCpnt);
+	}
+	if (host_byte(SCpnt->result) != DID_OK) {
+		return FAILED;
+	}
+	/*
+	 * Next, check the message byte.
+	 */
+	if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+		return FAILED;
+	}
+	/*
+	 * Now, check the status byte to see if this indicates anything special.
+	 */
+	switch (status_byte(SCpnt->result)) {
+	case GOOD:
+	case COMMAND_TERMINATED:
+		return SUCCESS;
+	case CHECK_CONDITION:
+		return scsi_check_sense(SCpnt);
+	case CONDITION_GOOD:
+	case INTERMEDIATE_GOOD:
+	case INTERMEDIATE_C_GOOD:
+		/*
+		 * Who knows?  FIXME(eric)
+		 */
+		return SUCCESS;
+	case BUSY:
+	case QUEUE_FULL:
+	case RESERVATION_CONFLICT:
+	default:
+		return FAILED;
+	}
+	return FAILED;
+}
+
+/*
+ * Function:  scsi_check_sense
+ *
+ * Purpose:     Examine sense information - give suggestion as to what
+ *              we should do with it.
+ */
+STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt)
+{
+	if (!scsi_sense_valid(SCpnt)) {
+		return FAILED;
+	}
+	if (SCpnt->sense_buffer[2] & 0xe0)
+		return SUCCESS;
+
+	switch (SCpnt->sense_buffer[2] & 0xf) {
+	case NO_SENSE:
+		return SUCCESS;
+	case RECOVERED_ERROR:
+		return /* SOFT_ERROR */ SUCCESS;
+
+	case ABORTED_COMMAND:
+		return NEEDS_RETRY;
+	case NOT_READY:
+	case UNIT_ATTENTION:
+		/*
+		 * If we are expecting a CC/UA because of a bus reset that we
+		 * performed, treat this just as a retry.  Otherwise this is
+		 * information that we should pass up to the upper-level driver
+		 * so that we can deal with it there.
+		 */
+		if (SCpnt->device->expecting_cc_ua) {
+			SCpnt->device->expecting_cc_ua = 0;
+			return NEEDS_RETRY;
+		}
+		/*
+		 * If the device is in the process of becoming ready, we 
+		 * should retry.
+		 */
+		if ((SCpnt->sense_buffer[12] == 0x04) &&
+			(SCpnt->sense_buffer[13] == 0x01)) {
+			return NEEDS_RETRY;
+		}
+		return SUCCESS;
+
+		/* these three are not supported */
+	case COPY_ABORTED:
+	case VOLUME_OVERFLOW:
+	case MISCOMPARE:
+		return SUCCESS;
+
+	case MEDIUM_ERROR:
+		return NEEDS_RETRY;
+
+	case ILLEGAL_REQUEST:
+	case BLANK_CHECK:
+	case DATA_PROTECT:
+	case HARDWARE_ERROR:
+	default:
+		return SUCCESS;
+	}
+}
+
+
+/*
+ * Function:  scsi_restart_operations
+ *
+ * Purpose:     Restart IO operations to the specified host.
+ *
+ * Arguments:   host  - host that we are restarting
+ *
+ * Lock status: Assumed that locks are not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       When we entered the error handler, we blocked all further
+ *              I/O to this device.  We need to 'reverse' this process.
+ */
+STATIC void scsi_restart_operations(struct Scsi_Host *host)
+{
+	Scsi_Device *SDpnt;
+	unsigned long flags;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	/*
+	 * Next free up anything directly waiting upon the host.  This will be
+	 * requests for character device operations, and also for ioctls to queued
+	 * block devices.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n"));
+#if 0
+	wake_up(&host->host_wait);
+#endif
+
+	/*
+	 * Finally we need to re-initiate requests that may be pending.  We will
+	 * have had everything blocked while error handling is taking place, and
+	 * now that error recovery is done, we will need to ensure that these
+	 * requests are started.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		request_queue_t *q;
+		if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
+		    || (host->host_blocked)
+		    || (host->host_self_blocked)
+		    || (SDpnt->device_blocked)) {
+			break;
+		}
+		q = &SDpnt->request_queue;
+		q->request_fn(q);
+	}
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Function:  scsi_unjam_host
+ *
+ * Purpose:     Attempt to fix a host which has a command that failed for
+ *              some reason.
+ *
+ * Arguments:   host    - host that needs unjamming.
+ * 
+ * Returns:     Nothing
+ *
+ * Notes:       When we come in here, we *know* that all commands on the
+ *              bus have either completed, failed or timed out.  We also
+ *              know that no further commands are being sent to the host,
+ *              so things are relatively quiet and we have freedom to
+ *              fiddle with things as we wish.
+ *
+ * Additional note:  This is only the *default* implementation.  It is possible
+ *              for individual drivers to supply their own version of this
+ *              function, and if the maintainer wishes to do this, it is
+ *              strongly suggested that this function be taken as a template
+ *              and modified.  This function was designed to correctly handle
+ *              problems for about 95% of the different cases out there, and
+ *              it should always provide at least a reasonable amount of error
+ *              recovery.
+ *
+ * Note3:       Any command marked 'FAILED' or 'TIMEOUT' must eventually
+ *              have scsi_finish_command() called for it.  We do all of
+ *              the retry stuff here, so when we restart the host after we
+ *              return it should have an empty queue.
+ */
+STATIC int scsi_unjam_host(struct Scsi_Host *host)
+{
+	int devices_failed;
+	int numfailed;
+	int ourrtn;
+	int rtn = FALSE;
+	int result;
+	Scsi_Cmnd *SCloop;
+	Scsi_Cmnd *SCpnt;
+	Scsi_Device *SDpnt;
+	Scsi_Device *SDloop;
+	Scsi_Cmnd *SCdone;
+	int timed_out;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	SCdone = NULL;
+
+	/*
+	 * First, protect against any sort of race condition.  If any of the outstanding
+	 * commands are in states that indicate that we are not yet blocked (i.e. we are
+	 * not in a quiet state) then we got woken up in error.  If we ever end up here,
+	 * we need to re-examine some of the assumptions.
+	 */
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+			if (SCpnt->state == SCSI_STATE_FAILED
+			    || SCpnt->state == SCSI_STATE_TIMEOUT
+			    || SCpnt->state == SCSI_STATE_INITIALIZING
+			    || SCpnt->state == SCSI_STATE_UNUSED) {
+				continue;
+			}
+			/*
+			 * Rats.  Something is still floating around out there.  This could
+			 * be the result of the fact that the upper level drivers are still frobbing
+			 * commands that might have succeeded.  There are two outcomes.  One is that
+			 * the command block will eventually be freed, and the other one is that
+			 * the command will be queued and will be finished along the way.
+			 */
+			SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));
+
+/*
+ *        panic("SCSI Error handler woken too early\n");
+ *
+ * This is no longer a problem, since now the code cares only about
+ * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED.
+ * Other states are useful only to release active commands when devices are
+ * set offline. If (host->host_active == host->host_busy) we can safely assume
+ * that there are no commands in state other then TIMEOUT od FAILED. (DB)
+ *
+ * FIXME:
+ * It is not easy to release correctly commands according to their state when 
+ * devices are set offline, when the state is neither TIMEOUT nor FAILED.
+ * When a device is set offline, we can have some command with
+ * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, 
+ * state=SCSI_STATE_INITIALIZING and the driver module cannot be released.
+ * (DB, 17 May 1998)
+ */
+		}
+	}
+
+	/*
+	 * Next, see if we need to request sense information.  if so,
+	 * then get it now, so we have a better idea of what to do.
+	 * FIXME(eric) this has the unfortunate side effect that if a host
+	 * adapter does not automatically request sense information, that we end
+	 * up shutting it down before we request it.  All hosts should be doing this
+	 * anyways, so for now all I have to say is tough noogies if you end up in here.
+	 * On second thought, this is probably a good idea.  We *really* want to give
+	 * authors an incentive to automatically request this.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n"));
+
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+			if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) {
+				continue;
+			}
+			SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n",
+							  SCpnt->target));
+			rtn = scsi_request_sense(SCpnt);
+			if (rtn != SUCCESS) {
+				continue;
+			}
+			SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n",
+						  SCpnt, SCpnt->result));
+			SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt));
+
+			result = scsi_decide_disposition(SCpnt);
+
+			/*
+			 * If the result was normal, then just pass it along to the
+			 * upper level.
+			 */
+			if (result == SUCCESS) {
+				SCpnt->host->host_failed--;
+				scsi_eh_finish_command(&SCdone, SCpnt);
+			}
+			if (result != NEEDS_RETRY) {
+				continue;
+			}
+			/* 
+			 * We only come in here if we want to retry a
+			 * command.  The test to see whether the command
+			 * should be retried should be keeping track of the
+			 * number of tries, so we don't end up looping, of
+			 * course.  
+			 */
+			SCpnt->state = NEEDS_RETRY;
+			rtn = scsi_eh_retry_command(SCpnt);
+			if (rtn != SUCCESS) {
+				continue;
+			}
+			/*
+			 * We eventually hand this one back to the top level.
+			 */
+			SCpnt->host->host_failed--;
+			scsi_eh_finish_command(&SCdone, SCpnt);
+		}
+	}
+
+	/*
+	 * Go through the list of commands and figure out where we stand and how bad things
+	 * really are.
+	 */
+	numfailed = 0;
+	timed_out = 0;
+	devices_failed = 0;
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		unsigned int device_error = 0;
+
+		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+			if (SCpnt->state == SCSI_STATE_FAILED) {
+				SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n",
+							 SCpnt->target));
+				numfailed++;
+				device_error++;
+			}
+			if (SCpnt->state == SCSI_STATE_TIMEOUT) {
+				SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n",
+							 SCpnt->target));
+				timed_out++;
+				device_error++;
+			}
+		}
+		if (device_error > 0) {
+			devices_failed++;
+		}
+	}
+
+	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n",
+				  numfailed, timed_out, devices_failed));
+
+	if (host->host_failed == 0) {
+		ourrtn = TRUE;
+		goto leave;
+	}
+	/*
+	 * Next, try and see whether or not it makes sense to try and abort
+	 * the running command.  This only works out to be the case if we have
+	 * one command that has timed out.  If the command simply failed, it
+	 * makes no sense to try and abort the command, since as far as the
+	 * host adapter is concerned, it isn't running.
+	 */
+
+	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n"));
+
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+			if (SCloop->state != SCSI_STATE_TIMEOUT) {
+				continue;
+			}
+			rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);
+			if (rtn == SUCCESS) {
+				rtn = scsi_test_unit_ready(SCloop);
+
+				if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+					rtn = scsi_eh_retry_command(SCloop);
+
+					if (rtn == SUCCESS) {
+						SCloop->host->host_failed--;
+						scsi_eh_finish_command(&SCdone, SCloop);
+					}
+				}
+			}
+		}
+	}
+
+	/*
+	 * If we have corrected all of the problems, then we are done.
+	 */
+	if (host->host_failed == 0) {
+		ourrtn = TRUE;
+		goto leave;
+	}
+	/*
+	 * Either the abort wasn't appropriate, or it didn't succeed.
+	 * Now try a bus device reset.  Still, look to see whether we have
+	 * multiple devices that are jammed or not - if we have multiple devices,
+	 * it makes no sense to try BUS_DEVICE_RESET - we really would need
+	 * to try a BUS_RESET instead.
+	 *
+	 * Does this make sense - should we try BDR on each device individually?
+	 * Yes, definitely.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));
+
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+			if (SCloop->state == SCSI_STATE_FAILED
+			    || SCloop->state == SCSI_STATE_TIMEOUT) {
+				break;
+			}
+		}
+
+		if (SCloop == NULL) {
+			continue;
+		}
+		/*
+		 * OK, we have a device that is having problems.  Try and send
+		 * a bus device reset to it.
+		 *
+		 * FIXME(eric) - make sure we handle the case where multiple
+		 * commands to the same device have failed. They all must
+		 * get properly restarted.
+		 */
+		rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);
+
+		if (rtn == SUCCESS) {
+			rtn = scsi_test_unit_ready(SCloop);
+
+			if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+				rtn = scsi_eh_retry_command(SCloop);
+
+				if (rtn == SUCCESS) {
+					SCloop->host->host_failed--;
+					scsi_eh_finish_command(&SCdone, SCloop);
+				}
+			}
+		}
+	}
+
+	if (host->host_failed == 0) {
+		ourrtn = TRUE;
+		goto leave;
+	}
+	/*
+	 * If we ended up here, we have serious problems.  The only thing left
+	 * to try is a full bus reset.  If someone has grabbed the bus and isn't
+	 * letting go, then perhaps this will help.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n"));
+
+	/* 
+	 * We really want to loop over the various channels, and do this on
+	 * a channel by channel basis.  We should also check to see if any
+	 * of the failed commands are on soft_reset devices, and if so, skip
+	 * the reset.  
+	 */
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+	      next_device:
+		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+			if (SCpnt->state != SCSI_STATE_FAILED
+			    && SCpnt->state != SCSI_STATE_TIMEOUT) {
+				continue;
+			}
+			/*
+			 * We have a failed command.  Make sure there are no other failed
+			 * commands on the same channel that are timed out and implement a
+			 * soft reset.
+			 */
+			for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+				for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+					if (SCloop->channel != SCpnt->channel) {
+						continue;
+					}
+					if (SCloop->state != SCSI_STATE_FAILED
+					    && SCloop->state != SCSI_STATE_TIMEOUT) {
+						continue;
+					}
+					if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) {
+						/* 
+						 * If this device uses the soft reset option, and this
+						 * is one of the devices acting up, then our only
+						 * option is to wait a bit, since the command is
+						 * supposedly still running.  
+						 *
+						 * FIXME(eric) - right now we will just end up falling
+						 * through to the 'take device offline' case.
+						 *
+						 * FIXME(eric) - It is possible that the command completed
+						 * *after* the error recovery procedure started, and if this
+						 * is the case, we are worrying about nothing here.
+						 */
+
+						scsi_sleep(1 * HZ);
+						goto next_device;
+					}
+				}
+			}
+
+			/*
+			 * We now know that we are able to perform a reset for the
+			 * bus that SCpnt points to.  There are no soft-reset devices
+			 * with outstanding timed out commands.
+			 */
+			rtn = scsi_try_bus_reset(SCpnt);
+			if (rtn == SUCCESS) {
+				for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+					for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+						if (SCloop->channel != SCpnt->channel) {
+							continue;
+						}
+						if (SCloop->state != SCSI_STATE_FAILED
+						    && SCloop->state != SCSI_STATE_TIMEOUT) {
+							continue;
+						}
+						rtn = scsi_test_unit_ready(SCloop);
+
+						if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+							rtn = scsi_eh_retry_command(SCloop);
+
+							if (rtn == SUCCESS) {
+								SCpnt->host->host_failed--;
+								scsi_eh_finish_command(&SCdone, SCloop);
+							}
+						}
+						/*
+						 * If the bus reset worked, but we are still unable to
+						 * talk to the device, take it offline.
+						 * FIXME(eric) - is this really the correct thing to do?
+						 */
+						if (rtn != SUCCESS) {
+							printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+
+							SDloop->online = FALSE;
+							SDloop->host->host_failed--;
+							scsi_eh_finish_command(&SCdone, SCloop);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	if (host->host_failed == 0) {
+		ourrtn = TRUE;
+		goto leave;
+	}
+	/*
+	 * If we ended up here, we have serious problems.  The only thing left
+	 * to try is a full host reset - perhaps the firmware on the device
+	 * crashed, or something like that.
+	 *
+	 * It is assumed that a succesful host reset will cause *all* information
+	 * about the command to be flushed from both the host adapter *and* the
+	 * device.
+	 *
+	 * FIXME(eric) - it isn't clear that devices that implement the soft reset
+	 * option can ever be cleared except via cycling the power.  The problem is
+	 * that sending the host reset command will cause the host to forget
+	 * about the pending command, but the device won't forget.  For now, we
+	 * skip the host reset option if any of the failed devices are configured
+	 * to use the soft reset option.
+	 */
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+	      next_device2:
+		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+			if (SCpnt->state != SCSI_STATE_FAILED
+			    && SCpnt->state != SCSI_STATE_TIMEOUT) {
+				continue;
+			}
+			if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) {
+				/* 
+				 * If this device uses the soft reset option, and this
+				 * is one of the devices acting up, then our only
+				 * option is to wait a bit, since the command is
+				 * supposedly still running.  
+				 *
+				 * FIXME(eric) - right now we will just end up falling
+				 * through to the 'take device offline' case.
+				 */
+				SCSI_LOG_ERROR_RECOVERY(3,
+							printk("scsi_unjam_host: Unable to try hard host reset\n"));
+
+				/*
+				 * Due to the spinlock, we will never get out of this
+				 * loop without a proper wait. (DB)
+				 */
+				scsi_sleep(1 * HZ);
+
+				goto next_device2;
+			}
+			SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n"));
+
+			/*
+			 * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.
+			 */
+			rtn = scsi_try_host_reset(SCpnt);
+			if (rtn == SUCCESS) {
+				/*
+				 * FIXME(eric) we assume that all commands are flushed from the
+				 * controller.  We should get a DID_RESET for all of the commands
+				 * that were pending.  We should ignore these so that we can
+				 * guarantee that we are in a consistent state.
+				 *
+				 * I believe this to be the case right now, but this needs to be
+				 * tested.
+				 */
+				for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+					for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+						if (SCloop->state != SCSI_STATE_FAILED
+						    && SCloop->state != SCSI_STATE_TIMEOUT) {
+							continue;
+						}
+						rtn = scsi_test_unit_ready(SCloop);
+
+						if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+							rtn = scsi_eh_retry_command(SCloop);
+
+							if (rtn == SUCCESS) {
+								SCpnt->host->host_failed--;
+								scsi_eh_finish_command(&SCdone, SCloop);
+							}
+						}
+						if (rtn != SUCCESS) {
+							printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+							SDloop->online = FALSE;
+							SDloop->host->host_failed--;
+							scsi_eh_finish_command(&SCdone, SCloop);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/*
+	 * If we solved all of the problems, then let's rev up the engines again.
+	 */
+	if (host->host_failed == 0) {
+		ourrtn = TRUE;
+		goto leave;
+	}
+	/*
+	 * If the HOST RESET failed, then for now we assume that the entire host
+	 * adapter is too hosed to be of any use.  For our purposes, however, it is
+	 * easier to simply take the devices offline that correspond to commands
+	 * that failed.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n"));
+
+	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+		for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+			if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) {
+				SDloop = SCloop->device;
+				if (SDloop->online == TRUE) {
+					printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+					SDloop->online = FALSE;
+				}
+
+				/*
+				 * This should pass the failure up to the top level driver, and
+				 * it will have to try and do something intelligent with it.
+				 */
+				SCloop->host->host_failed--;
+
+				if (SCloop->state == SCSI_STATE_TIMEOUT) {
+					SCloop->result |= (DRIVER_TIMEOUT << 24);
+				}
+				SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n",
+				    SDloop->id, SCloop->result));
+
+				scsi_eh_finish_command(&SCdone, SCloop);
+			}
+		}
+	}
+
+	if (host->host_failed != 0) {
+		panic("scsi_unjam_host: Miscount of number of failed commands.\n");
+	}
+	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n"));
+
+	ourrtn = FALSE;
+
+      leave:
+
+	/*
+	 * We should have a list of commands that we 'finished' during the course of
+	 * error recovery.  This should be the same as the list of commands that timed out
+	 * or failed.  We are currently holding these things in a linked list - we didn't
+	 * put them in the bottom half queue because we wanted to keep things quiet while
+	 * we were working on recovery, and passing them up to the top level could easily
+	 * cause the top level to try and queue something else again.
+	 *
+	 * Start by marking that the host is no longer in error recovery.
+	 */
+	host->in_recovery = 0;
+
+	/*
+	 * Take the list of commands, and stick them in the bottom half queue.
+	 * The current implementation of scsi_done will do this for us - if need
+	 * be we can create a special version of this function to do the
+	 * same job for us.
+	 */
+	for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) {
+		SCdone = SCpnt->bh_next;
+		SCpnt->bh_next = NULL;
+                /*
+                 * Oh, this is a vile hack.  scsi_done() expects a timer
+                 * to be running on the command.  If there isn't, it assumes
+                 * that the command has actually timed out, and a timer
+                 * handler is running.  That may well be how we got into
+                 * this fix, but right now things are stable.  We add
+                 * a timer back again so that we can report completion.
+                 * scsi_done() will immediately remove said timer from
+                 * the command, and then process it.
+                 */
+		scsi_add_timer(SCpnt, 100, scsi_eh_times_out);
+		scsi_done(SCpnt);
+	}
+
+	return (ourrtn);
+}
+
+
+/*
+ * Function:  scsi_error_handler
+ *
+ * Purpose:     Handle errors/timeouts of scsi commands, try and clean up
+ *              and unjam the bus, and restart things.
+ *
+ * Arguments:   host    - host for which we are running.
+ *
+ * Returns:     Never returns.
+ *
+ * Notes:       This is always run in the context of a kernel thread.  The
+ *              idea is that we start this thing up when the kernel starts
+ *              up (one per host that we detect), and it immediately goes to
+ *              sleep and waits for some event (i.e. failure).  When this
+ *              takes place, we have the job of trying to unjam the bus
+ *              and restarting things.
+ *
+ */
+void scsi_error_handler(void *data)
+{
+	struct Scsi_Host *host = (struct Scsi_Host *) data;
+	int rtn;
+#if 0
+	DECLARE_MUTEX_LOCKED(sem);
+
+        /*
+         * We only listen to signals if the HA was loaded as a module.
+         * If the HA was compiled into the kernel, then we don't listen
+         * to any signals.
+         */
+        if( host->loaded_as_module ) {
+	siginitsetinv(&current->blocked, SHUTDOWN_SIGS);
+	} else {
+	siginitsetinv(&current->blocked, 0);
+        }
+
+	lock_kernel();
+
+	/*
+	 *    Flush resources
+	 */
+
+	daemonize();
+	reparent_to_init();
+
+	/*
+	 * Set the name of this process.
+	 */
+
+	sprintf(current->comm, "scsi_eh_%d", host->host_no);
+
+	host->eh_wait = &sem;
+#else
+	host->eh_wait = (void *)0xDEADBEEF; 
+#endif
+	host->ehandler = current;
+
+#if 0
+	unlock_kernel();
+#endif
+
+	/*
+	 * Wake up the thread that created us.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", host->eh_notify->count.counter));
+
+#if 0
+	up(host->eh_notify);
+#endif
+
+	while (1) {
+		/*
+		 * If we get a signal, it means we are supposed to go
+		 * away and die.  This typically happens if the user is
+		 * trying to unload a module.
+		 */
+		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler sleeping\n"));
+
+#if 0
+		/*
+		 * Note - we always use down_interruptible with the semaphore
+		 * even if the module was loaded as part of the kernel.  The
+		 * reason is that down() will cause this thread to be counted
+		 * in the load average as a running process, and down
+		 * interruptible doesn't.  Given that we need to allow this
+		 * thread to die if the driver was loaded as a module, using
+		 * semaphores isn't unreasonable.
+		 */
+		down_interruptible(&sem);
+#endif
+		if( host->loaded_as_module ) {
+			if (signal_pending(current))
+				break;
+                }
+
+		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n"));
+
+		host->eh_active = 1;
+
+		/*
+		 * We have a host that is failing for some reason.  Figure out
+		 * what we need to do to get it up and online again (if we can).
+		 * If we fail, we end up taking the thing offline.
+		 */
+		if (host->hostt->eh_strategy_handler != NULL) {
+			rtn = host->hostt->eh_strategy_handler(host);
+		} else {
+			rtn = scsi_unjam_host(host);
+		}
+
+		host->eh_active = 0;
+
+		/*
+		 * Note - if the above fails completely, the action is to take
+		 * individual devices offline and flush the queue of any
+		 * outstanding requests that may have been pending.  When we
+		 * restart, we restart any I/O to any other devices on the bus
+		 * which are still online.
+		 */
+		scsi_restart_operations(host);
+
+	}
+
+	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler exiting\n"));
+
+	/*
+	 * Make sure that nobody tries to wake us up again.
+	 */
+	host->eh_wait = NULL;
+
+	/*
+	 * Knock this down too.  From this point on, the host is flying
+	 * without a pilot.  If this is because the module is being unloaded,
+	 * that's fine.  If the user sent a signal to this thing, we are
+	 * potentially in real danger.
+	 */
+	host->in_recovery = 0;
+	host->eh_active = 0;
+	host->ehandler = NULL;
+
+#if 0
+	/*
+	 * If anyone is waiting for us to exit (i.e. someone trying to unload
+	 * a driver), then wake up that process to let them know we are on
+	 * the way out the door.  This may be overkill - I *think* that we
+	 * could probably just unload the driver and send the signal, and when
+	 * the error handling thread wakes up that it would just exit without
+	 * needing to touch any memory associated with the driver itself.
+	 */
+	if (host->eh_notify != NULL)
+		up(host->eh_notify);
+#endif
+}
+
+/*
+ * Function:	scsi_new_reset
+ *
+ * Purpose:	Send requested reset to a bus or device at any phase.
+ *
+ * Arguments:	SCpnt	- command ptr to send reset with (usually a dummy)
+ *		flag - reset type (see scsi.h)
+ *
+ * Returns:	SUCCESS/FAILURE.
+ *
+ * Notes:	This is used by the SCSI Generic driver to provide
+ *		Bus/Device reset capability.
+ */
+int
+scsi_new_reset(Scsi_Cmnd *SCpnt, int flag)
+{
+	int rtn;
+
+	switch(flag) {
+	case SCSI_TRY_RESET_DEVICE:
+		rtn = scsi_try_bus_device_reset(SCpnt, 0);
+		if (rtn == SUCCESS)
+			break;
+		/* FALLTHROUGH */
+	case SCSI_TRY_RESET_BUS:
+		rtn = scsi_try_bus_reset(SCpnt);
+		if (rtn == SUCCESS)
+			break;
+		/* FALLTHROUGH */
+	case SCSI_TRY_RESET_HOST:
+		rtn = scsi_try_host_reset(SCpnt);
+		break;
+	default:
+		rtn = FAILED;
+	}
+
+	return rtn;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_ioctl.c b/xen/drivers/scsi/scsi_ioctl.c
new file mode 100644
index 0000000000..7f1df6e8f1
--- /dev/null
+++ b/xen/drivers/scsi/scsi_ioctl.c
@@ -0,0 +1,538 @@
+/*
+ * Changes:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 08/23/2000
+ * - get rid of some verify_areas and use __copy*user and __get/put_user
+ *   for the ones that remain
+ */
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/page.h>
+
+/*  #include <linux/interrupt.h> */
+/*  #include <linux/errno.h> */
+/*  #include <linux/kernel.h> */
+#include <xeno/sched.h>
+/*  #include <linux/mm.h> */
+/*  #include <linux/string.h> */
+
+#include <xeno/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+#include <scsi/scsi_ioctl.h>
+
+#define NORMAL_RETRIES			5
+#define IOCTL_NORMAL_TIMEOUT			(10 * HZ)
+#define FORMAT_UNIT_TIMEOUT		(2 * 60 * 60 * HZ)
+#define START_STOP_TIMEOUT		(60 * HZ)
+#define MOVE_MEDIUM_TIMEOUT		(5 * 60 * HZ)
+#define READ_ELEMENT_STATUS_TIMEOUT	(5 * 60 * HZ)
+#define READ_DEFECT_DATA_TIMEOUT	(60 * HZ )  /* ZIP-250 on parallel port takes as long! */
+
+#define MAX_BUF PAGE_SIZE
+
+/*
+ * If we are told to probe a host, we will return 0 if  the host is not
+ * present, 1 if the host is present, and will return an identifying
+ * string at *arg, if arg is non null, filling to the length stored at
+ * (int *) arg
+ */
+
+static int ioctl_probe(struct Scsi_Host *host, void *buffer)
+{
+	unsigned int len, slen;
+	const char *string;
+	int temp = host->hostt->present;
+
+	if (temp && buffer) {
+		if (get_user(len, (unsigned int *) buffer))
+			return -EFAULT;
+
+		if (host->hostt->info)
+			string = host->hostt->info(host);
+		else
+			string = host->hostt->name;
+		if (string) {
+			slen = strlen(string);
+			if (len > slen)
+				len = slen + 1;
+			if (copy_to_user(buffer, string, len))
+				return -EFAULT;
+		}
+	}
+	return temp;
+}
+
+/*
+
+ * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host.
+ * The IOCTL_NORMAL_TIMEOUT and NORMAL_RETRIES  variables are used.  
+ * 
+ * dev is the SCSI device struct ptr, *(int *) arg is the length of the
+ * input data, if any, not including the command string & counts, 
+ * *((int *)arg + 1) is the output buffer size in bytes.
+ * 
+ * *(char *) ((int *) arg)[2] the actual command byte.   
+ * 
+ * Note that if more than MAX_BUF bytes are requested to be transferred,
+ * the ioctl will fail with error EINVAL.  MAX_BUF can be increased in
+ * the future by increasing the size that scsi_malloc will accept.
+ * 
+ * This size *does not* include the initial lengths that were passed.
+ * 
+ * The SCSI command is read from the memory location immediately after the
+ * length words, and the input data is right after the command.  The SCSI
+ * routines know the command size based on the opcode decode.  
+ * 
+ * The output area is then filled in starting from the command byte. 
+ */
+
+static int ioctl_internal_command(Scsi_Device * dev, char *cmd,
+				  int timeout, int retries)
+{
+	int result;
+	Scsi_Request *SRpnt;
+	Scsi_Device *SDpnt;
+
+
+	SCSI_LOG_IOCTL(1, printk("Trying ioctl with scsi command %d\n", cmd[0]));
+	if (NULL == (SRpnt = scsi_allocate_request(dev))) {
+		printk("SCSI internal ioctl failed, no memory\n");
+		return -ENOMEM;
+	}
+
+	SRpnt->sr_data_direction = SCSI_DATA_NONE;
+        scsi_wait_req(SRpnt, cmd, NULL, 0, timeout, retries);
+
+	SCSI_LOG_IOCTL(2, printk("Ioctl returned  0x%x\n", SRpnt->sr_result));
+
+	if (driver_byte(SRpnt->sr_result) != 0)
+		switch (SRpnt->sr_sense_buffer[2] & 0xf) {
+		case ILLEGAL_REQUEST:
+			if (cmd[0] == ALLOW_MEDIUM_REMOVAL)
+				dev->lockable = 0;
+			else
+				printk("SCSI device (ioctl) reports ILLEGAL REQUEST.\n");
+			break;
+		case NOT_READY:	/* This happens if there is no disc in drive */
+			if (dev->removable && (cmd[0] != TEST_UNIT_READY)) {
+				printk(KERN_INFO "Device not ready.  Make sure there is a disc in the drive.\n");
+				break;
+			}
+		case UNIT_ATTENTION:
+			if (dev->removable) {
+				dev->changed = 1;
+				SRpnt->sr_result = 0;	/* This is no longer considered an error */
+				/* gag this error, VFS will log it anyway /axboe */
+				/* printk(KERN_INFO "Disc change detected.\n"); */
+				break;
+			};
+		default:	/* Fall through for non-removable media */
+			printk("SCSI error: host %d id %d lun %d return code = %x\n",
+			       dev->host->host_no,
+			       dev->id,
+			       dev->lun,
+			       SRpnt->sr_result);
+			printk("\tSense class %x, sense error %x, extended sense %x\n",
+			       sense_class(SRpnt->sr_sense_buffer[0]),
+			       sense_error(SRpnt->sr_sense_buffer[0]),
+			       SRpnt->sr_sense_buffer[2] & 0xf);
+
+		};
+
+	result = SRpnt->sr_result;
+
+	SCSI_LOG_IOCTL(2, printk("IOCTL Releasing command\n"));
+	SDpnt = SRpnt->sr_device;
+	scsi_release_request(SRpnt);
+	SRpnt = NULL;
+
+	return result;
+}
+
+/*
+ * This interface is depreciated - users should use the scsi generic (sg)
+ * interface instead, as this is a more flexible approach to performing
+ * generic SCSI commands on a device.
+ *
+ * The structure that we are passed should look like:
+ *
+ * struct sdata {
+ *  unsigned int inlen;      [i] Length of data to be written to device 
+ *  unsigned int outlen;     [i] Length of data to be read from device 
+ *  unsigned char cmd[x];    [i] SCSI command (6 <= x <= 12).
+ *                           [o] Data read from device starts here.
+ *                           [o] On error, sense buffer starts here.
+ *  unsigned char wdata[y];  [i] Data written to device starts here.
+ * };
+ * Notes:
+ *   -  The SCSI command length is determined by examining the 1st byte
+ *      of the given command. There is no way to override this.
+ *   -  Data transfers are limited to PAGE_SIZE (4K on i386, 8K on alpha).
+ *   -  The length (x + y) must be at least OMAX_SB_LEN bytes long to
+ *      accomodate the sense buffer when an error occurs.
+ *      The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that
+ *      old code will not be surprised.
+ *   -  If a Unix error occurs (e.g. ENOMEM) then the user will receive
+ *      a negative return and the Unix error code in 'errno'. 
+ *      If the SCSI command succeeds then 0 is returned.
+ *      Positive numbers returned are the compacted SCSI error codes (4 
+ *      bytes in one int) where the lowest byte is the SCSI status.
+ *      See the drivers/scsi/scsi.h file for more information on this.
+ *
+ */
+#define OMAX_SB_LEN 16		/* Old sense buffer length */
+
+int scsi_ioctl_send_command(Scsi_Device * dev, Scsi_Ioctl_Command * sic)
+{
+	char *buf;
+	unsigned char cmd[MAX_COMMAND_SIZE];
+	char *cmd_in;
+	Scsi_Request *SRpnt;
+	Scsi_Device *SDpnt;
+	unsigned char opcode;
+	unsigned int inlen, outlen, cmdlen;
+	unsigned int needed, buf_needed;
+	int timeout, retries, result;
+	int data_direction;
+
+	if (!sic)
+		return -EINVAL;
+	/*
+	 * Verify that we can read at least this much.
+	 */
+	if (verify_area(VERIFY_READ, sic, sizeof(Scsi_Ioctl_Command)))
+		return -EFAULT;
+
+	if(__get_user(inlen, &sic->inlen))
+		return -EFAULT;
+		
+	if(__get_user(outlen, &sic->outlen))
+		return -EFAULT;
+
+	/*
+	 * We do not transfer more than MAX_BUF with this interface.
+	 * If the user needs to transfer more data than this, they
+	 * should use scsi_generics (sg) instead.
+	 */
+	if (inlen > MAX_BUF)
+		return -EINVAL;
+	if (outlen > MAX_BUF)
+		return -EINVAL;
+
+	cmd_in = sic->data;
+	if(get_user(opcode, cmd_in))
+		return -EFAULT;
+
+	needed = buf_needed = (inlen > outlen ? inlen : outlen);
+	if (buf_needed) {
+		buf_needed = (buf_needed + 511) & ~511;
+		if (buf_needed > MAX_BUF)
+			buf_needed = MAX_BUF;
+		buf = (char *) scsi_malloc(buf_needed);
+		if (!buf)
+			return -ENOMEM;
+		memset(buf, 0, buf_needed);
+		if( inlen == 0 ) {
+			data_direction = SCSI_DATA_READ;
+		} else if (outlen == 0 ) {
+			data_direction = SCSI_DATA_WRITE;
+		} else {
+			/*
+			 * Can this ever happen?
+			 */
+			data_direction = SCSI_DATA_UNKNOWN;
+		}
+
+	} else {
+		buf = NULL;
+		data_direction = SCSI_DATA_NONE;
+	}
+
+	/*
+	 * Obtain the command from the user's address space.
+	 */
+	cmdlen = COMMAND_SIZE(opcode);
+	
+	result = -EFAULT;
+
+	if (verify_area(VERIFY_READ, cmd_in, cmdlen + inlen))
+		goto error;
+
+	if(__copy_from_user(cmd, cmd_in, cmdlen))
+		goto error;
+
+	/*
+	 * Obtain the data to be sent to the device (if any).
+	 */
+
+	if(copy_from_user(buf, cmd_in + cmdlen, inlen))
+		goto error;
+
+	/*
+	 * Set the lun field to the correct value.
+	 */
+	if (dev->scsi_level <= SCSI_2)
+		cmd[1] = (cmd[1] & 0x1f) | (dev->lun << 5);
+
+	switch (opcode) {
+	case FORMAT_UNIT:
+		timeout = FORMAT_UNIT_TIMEOUT;
+		retries = 1;
+		break;
+	case START_STOP:
+		timeout = START_STOP_TIMEOUT;
+		retries = NORMAL_RETRIES;
+		break;
+	case MOVE_MEDIUM:
+		timeout = MOVE_MEDIUM_TIMEOUT;
+		retries = NORMAL_RETRIES;
+		break;
+	case READ_ELEMENT_STATUS:
+		timeout = READ_ELEMENT_STATUS_TIMEOUT;
+		retries = NORMAL_RETRIES;
+		break;
+	case READ_DEFECT_DATA:
+		timeout = READ_DEFECT_DATA_TIMEOUT;
+		retries = 1;
+		break;
+	default:
+		timeout = IOCTL_NORMAL_TIMEOUT;
+		retries = NORMAL_RETRIES;
+		break;
+	}
+
+#ifndef DEBUG_NO_CMD
+
+
+	SRpnt = scsi_allocate_request(dev);
+        if( SRpnt == NULL )
+        {
+                result = -EINTR;
+                goto error;
+        }
+
+	SRpnt->sr_data_direction = data_direction;
+        scsi_wait_req(SRpnt, cmd, buf, needed, timeout, retries);
+
+	/* 
+	 * If there was an error condition, pass the info back to the user. 
+	 */
+
+	result = SRpnt->sr_result;
+
+	if (SRpnt->sr_result) {
+		int sb_len = sizeof(SRpnt->sr_sense_buffer);
+
+		sb_len = (sb_len > OMAX_SB_LEN) ? OMAX_SB_LEN : sb_len;
+		if (copy_to_user(cmd_in, SRpnt->sr_sense_buffer, sb_len))
+			result = -EFAULT;
+	} else {
+		if (copy_to_user(cmd_in, buf, outlen))
+			result = -EFAULT;
+	}	
+
+	SDpnt = SRpnt->sr_device;
+	scsi_release_request(SRpnt);
+	SRpnt = NULL;
+
+error:
+	if (buf)
+		scsi_free(buf, buf_needed);
+
+
+	return result;
+#else
+	{
+		int i;
+		printk("scsi_ioctl : device %d.  command = ", dev->id);
+		for (i = 0; i < cmdlen; ++i)
+			printk("%02x ", cmd[i]);
+		printk("\nbuffer =");
+		for (i = 0; i < 20; ++i)
+			printk("%02x ", buf[i]);
+		printk("\n");
+		printk("inlen = %d, outlen = %d, cmdlen = %d\n",
+		       inlen, outlen, cmdlen);
+		printk("buffer = %d, cmd_in = %d\n", buffer, cmd_in);
+	}
+	return 0;
+#endif
+}
+
+/*
+ * The scsi_ioctl_get_pci() function places into arg the value
+ * pci_dev::slot_name (8 characters) for the PCI device (if any).
+ * Returns: 0 on success
+ *          -ENXIO if there isn't a PCI device pointer
+ *                 (could be because the SCSI driver hasn't been
+ *                  updated yet, or because it isn't a SCSI
+ *                  device)
+ *          any copy_to_user() error on failure there
+ */
+static int
+scsi_ioctl_get_pci(Scsi_Device * dev, void *arg)
+{
+
+        if (!dev->host->pci_dev)
+        	return -ENXIO;
+        if(copy_to_user(arg, dev->host->pci_dev->slot_name,
+                            sizeof(dev->host->pci_dev->slot_name)))
+		return -EFAULT;
+	return 0;
+}
+
+
+/*
+ * the scsi_ioctl() function differs from most ioctls in that it does
+ * not take a major/minor number as the dev field.  Rather, it takes
+ * a pointer to a scsi_devices[] element, a structure. 
+ */
+int scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
+{
+	char scsi_cmd[MAX_COMMAND_SIZE];
+	char cmd_byte1;
+
+	/* No idea how this happens.... */
+	if (!dev)
+		return -ENXIO;
+
+	/*
+	 * If we are in the middle of error recovery, don't let anyone
+	 * else try and use this device.  Also, if error recovery fails, it
+	 * may try and take the device offline, in which case all further
+	 * access to the device is prohibited.
+	 */
+	if (!scsi_block_when_processing_errors(dev)) {
+		return -ENODEV;
+	}
+	cmd_byte1 = (dev->scsi_level <= SCSI_2) ? (dev->lun << 5) : 0;
+
+	switch (cmd) {
+	case SCSI_IOCTL_GET_IDLUN:
+		if (verify_area(VERIFY_WRITE, arg, sizeof(Scsi_Idlun)))
+			return -EFAULT;
+
+		__put_user((dev->id & 0xff)
+			 + ((dev->lun & 0xff) << 8)
+			 + ((dev->channel & 0xff) << 16)
+			 + ((dev->host->host_no & 0xff) << 24),
+			 &((Scsi_Idlun *) arg)->dev_id);
+		__put_user(dev->host->unique_id, &((Scsi_Idlun *) arg)->host_unique_id);
+		return 0;
+	case SCSI_IOCTL_GET_BUS_NUMBER:
+		return put_user(dev->host->host_no, (int *) arg);
+	case SCSI_IOCTL_TAGGED_ENABLE:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (!dev->tagged_supported)
+			return -EINVAL;
+		dev->tagged_queue = 1;
+		dev->current_tag = 1;
+		return 0;
+	case SCSI_IOCTL_TAGGED_DISABLE:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+		if (!dev->tagged_supported)
+			return -EINVAL;
+		dev->tagged_queue = 0;
+		dev->current_tag = 0;
+		return 0;
+	case SCSI_IOCTL_PROBE_HOST:
+		return ioctl_probe(dev->host, arg);
+	case SCSI_IOCTL_SEND_COMMAND:
+		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+			return -EACCES;
+		return scsi_ioctl_send_command((Scsi_Device *) dev,
+					     (Scsi_Ioctl_Command *) arg);
+	case SCSI_IOCTL_DOORLOCK:
+		if (!dev->removable || !dev->lockable)
+			return 0;
+		scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+		scsi_cmd[1] = cmd_byte1;
+		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+		scsi_cmd[4] = SCSI_REMOVAL_PREVENT;
+		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				   IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+		break;
+	case SCSI_IOCTL_DOORUNLOCK:
+		if (!dev->removable || !dev->lockable)
+			return 0;
+		scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+		scsi_cmd[1] = cmd_byte1;
+		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+		scsi_cmd[4] = SCSI_REMOVAL_ALLOW;
+		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				   IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+	case SCSI_IOCTL_TEST_UNIT_READY:
+		scsi_cmd[0] = TEST_UNIT_READY;
+		scsi_cmd[1] = cmd_byte1;
+		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+		scsi_cmd[4] = 0;
+		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				   IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+		break;
+	case SCSI_IOCTL_START_UNIT:
+		scsi_cmd[0] = START_STOP;
+		scsi_cmd[1] = cmd_byte1;
+		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+		scsi_cmd[4] = 1;
+		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				     START_STOP_TIMEOUT, NORMAL_RETRIES);
+		break;
+	case SCSI_IOCTL_STOP_UNIT:
+		scsi_cmd[0] = START_STOP;
+		scsi_cmd[1] = cmd_byte1;
+		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+		scsi_cmd[4] = 0;
+		return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+				     START_STOP_TIMEOUT, NORMAL_RETRIES);
+		break;
+        case SCSI_IOCTL_GET_PCI:
+                return scsi_ioctl_get_pci(dev, arg);
+                break;
+	default:
+		if (dev->host->hostt->ioctl)
+			return dev->host->hostt->ioctl(dev, cmd, arg);
+		return -EINVAL;
+	}
+	return -EINVAL;
+}
+
+/*
+ * Just like scsi_ioctl, only callable from kernel space with no 
+ * fs segment fiddling.
+ */
+
+int kernel_scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
+{
+	mm_segment_t oldfs;
+	int tmp;
+	oldfs = get_fs();
+	set_fs(get_ds());
+	tmp = scsi_ioctl(dev, cmd, arg);
+	set_fs(oldfs);
+	return tmp;
+}
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_lib.c b/xen/drivers/scsi/scsi_lib.c
new file mode 100644
index 0000000000..8c32bf547f
--- /dev/null
+++ b/xen/drivers/scsi/scsi_lib.c
@@ -0,0 +1,1201 @@
+/*
+ *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
+ *
+ *  SCSI queueing library.
+ *      Initial versions: Eric Youngdale (eric@andante.org).
+ *                        Based upon conversations with large numbers
+ *                        of people at Linux Expo.
+ */
+
+/*
+ * The fundamental purpose of this file is to contain a library of utility
+ * routines that can be used by low-level drivers.   Ultimately the idea
+ * is that there should be a sufficiently rich number of functions that it
+ * would be possible for a driver author to fashion a queueing function for
+ * a low-level driver if they wished.   Note however that this file also
+ * contains the "default" versions of these functions, as we don't want to
+ * go through and retrofit queueing functions into all 30 some-odd drivers.
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/*  #include <xeno/string.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/ioport.h> */
+/*  #include <xeno/kernel.h> */
+/*  #include <xeno/stat.h> */
+#include <xeno/blk.h>
+/*  #include <xeno/interrupt.h> */
+/*  #include <xeno/delay.h> */
+/*  #include <xeno/smp_lock.h> */
+/*  #include <xeno/completion.h> */
+
+
+#define __KERNEL_SYSCALLS__
+
+/* #include <xeno/unistd.h> */
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+#include <scsi/scsi_ioctl.h>
+
+#define SPECIAL XEN_BLOCK_SPECIAL
+
+/*
+ * This entire source file deals with the new queueing code.
+ */
+
+/*
+ * Function:	__scsi_insert_special()
+ *
+ * Purpose:	worker for scsi_insert_special_*()
+ *
+ * Arguments:	q - request queue where request should be inserted
+ *		rq - request to be inserted
+ * 		data - private data
+ *		at_head - insert request at head or tail of queue
+ *
+ * Lock status:	Assumed that io_request_lock is not held upon entry.
+ *
+ * Returns:	Nothing
+ */
+static void __scsi_insert_special(request_queue_t *q, struct request *rq,
+				  void *data, int at_head)
+{
+    unsigned long flags;
+    
+    ASSERT_LOCK(&io_request_lock, 0);
+    
+    rq->cmd = SPECIAL;
+    rq->special = data;
+    rq->q = NULL;
+    rq->nr_segments = 0;
+    rq->elevator_sequence = 0;
+    
+    /*
+     * We have the option of inserting the head or the tail of the queue.
+     * Typically we use the tail for new ioctls and so forth.  We use the
+     * head of the queue for things like a QUEUE_FULL message from a
+     * device, or a host that is unable to accept a particular command.
+     */
+    spin_lock_irqsave(&io_request_lock, flags);
+    
+    if (at_head)
+	list_add(&rq->queue, &q->queue_head);
+    else
+	list_add_tail(&rq->queue, &q->queue_head);
+    
+    q->request_fn(q);
+    spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+
+/*
+ * Function:    scsi_insert_special_cmd()
+ *
+ * Purpose:     Insert pre-formed command into request queue.
+ *
+ * Arguments:   SCpnt   - command that is ready to be queued.
+ *              at_head - boolean.  True if we should insert at head
+ *                        of queue, false if we should insert at tail.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function is called from character device and from
+ *              ioctl types of functions where the caller knows exactly
+ *              what SCSI command needs to be issued.   The idea is that
+ *              we merely inject the command into the queue (at the head
+ *              for now), and then call the queue request function to actually
+ *              process it.
+ */
+int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
+{
+    request_queue_t *q = &SCpnt->device->request_queue;
+    
+    __scsi_insert_special(q, &SCpnt->request, SCpnt, at_head);
+    return 0;
+}
+
+/*
+ * Function:    scsi_insert_special_req()
+ *
+ * Purpose:     Insert pre-formed request into request queue.
+ *
+ * Arguments:   SRpnt   - request that is ready to be queued.
+ *              at_head - boolean.  True if we should insert at head
+ *                        of queue, false if we should insert at tail.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function is called from character device and from
+ *              ioctl types of functions where the caller knows exactly
+ *              what SCSI command needs to be issued.   The idea is that
+ *              we merely inject the command into the queue (at the head
+ *              for now), and then call the queue request function to actually
+ *              process it.
+ */
+int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
+{
+    request_queue_t *q = &SRpnt->sr_device->request_queue;
+    
+    __scsi_insert_special(q, &SRpnt->sr_request, SRpnt, at_head);
+    return 0;
+}
+
+/*
+ * Function:    scsi_init_cmd_errh()
+ *
+ * Purpose:     Initialize SCpnt fields related to error handling.
+ *
+ * Arguments:   SCpnt   - command that is ready to be queued.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function has the job of initializing a number of
+ *              fields related to error handling.   Typically this will
+ *              be called once for each command, as required.
+ */
+int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
+{
+    ASSERT_LOCK(&io_request_lock, 0);
+    
+    SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+    SCpnt->reset_chain = NULL;
+    SCpnt->serial_number = 0;
+    SCpnt->serial_number_at_timeout = 0;
+    SCpnt->flags = 0;
+    SCpnt->retries = 0;
+
+    SCpnt->abort_reason = 0;
+    
+    memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+    
+    if (SCpnt->cmd_len == 0)
+	SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+    
+    /*
+     * We need saved copies of a number of fields - this is because
+     * error handling may need to overwrite these with different values
+     * to run different commands, and once error handling is complete,
+     * we will need to restore these values prior to running the actual
+     * command.
+     */
+    SCpnt->old_use_sg = SCpnt->use_sg;
+    SCpnt->old_cmd_len = SCpnt->cmd_len;
+    SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+    SCpnt->old_underflow = SCpnt->underflow;
+    memcpy((void *) SCpnt->data_cmnd,
+	   (const void *) SCpnt->cmnd, sizeof(SCpnt->cmnd));
+    SCpnt->buffer = SCpnt->request_buffer;
+    SCpnt->bufflen = SCpnt->request_bufflen;
+    
+    SCpnt->reset_chain = NULL;
+    
+    SCpnt->internal_timeout = NORMAL_TIMEOUT;
+    SCpnt->abort_reason = 0;
+
+    return 1;
+}
+
+/*
+ * Function:    scsi_queue_next_request()
+ *
+ * Purpose:     Handle post-processing of completed commands.
+ *
+ * Arguments:   SCpnt   - command that may need to be requeued.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       After command completion, there may be blocks left
+ *              over which weren't finished by the previous command
+ *              this can be for a number of reasons - the main one is
+ *              that a medium error occurred, and the sectors after
+ *              the bad block need to be re-read.
+ *
+ *              If SCpnt is NULL, it means that the previous command
+ *              was completely finished, and we should simply start
+ *              a new command, if possible.
+ *
+ *		This is where a lot of special case code has begun to
+ *		accumulate.  It doesn't really affect readability or
+ *		anything, but it might be considered architecturally
+ *		inelegant.  If more of these special cases start to
+ *		accumulate, I am thinking along the lines of implementing
+ *		an atexit() like technology that gets run when commands
+ *		complete.  I am not convinced that it is worth the
+ *		added overhead, however.  Right now as things stand,
+ *		there are simple conditional checks, and most hosts
+ *		would skip past.
+ *
+ *		Another possible solution would be to tailor different
+ *		handler functions, sort of like what we did in scsi_merge.c.
+ *		This is probably a better solution, but the number of different
+ *		permutations grows as 2**N, and if too many more special cases
+ *		get added, we start to get screwed.
+ */
+void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
+{
+    int all_clear;
+    unsigned long flags;
+    Scsi_Device *SDpnt;
+    struct Scsi_Host *SHpnt;
+    
+    ASSERT_LOCK(&io_request_lock, 0);
+    
+    spin_lock_irqsave(&io_request_lock, flags);
+    if (SCpnt != NULL) {
+	
+	/*
+	 * For some reason, we are not done with this request.
+	 * This happens for I/O errors in the middle of the request,
+	 * in which case we need to request the blocks that come after
+	 * the bad sector.
+	 */
+	SCpnt->request.special = (void *) SCpnt;
+	list_add(&SCpnt->request.queue, &q->queue_head);
+    }
+    
+    /*
+     * Just hit the requeue function for the queue.
+     */
+    q->request_fn(q);
+    
+    SDpnt = (Scsi_Device *) q->queuedata;
+    SHpnt = SDpnt->host;
+    
+    /*
+     * If this is a single-lun device, and we are currently finished
+     * with this device, then see if we need to get another device
+     * started.  FIXME(eric) - if this function gets too cluttered
+     * with special case code, then spin off separate versions and
+     * use function pointers to pick the right one.
+     */
+    if (SDpnt->single_lun
+	&& list_empty(&q->queue_head)
+	&& SDpnt->device_busy == 0) {
+	request_queue_t *q;
+	
+	for (SDpnt = SHpnt->host_queue;
+	     SDpnt;
+	     SDpnt = SDpnt->next) {
+	    if (((SHpnt->can_queue > 0)
+		 && (SHpnt->host_busy >= SHpnt->can_queue))
+		|| (SHpnt->host_blocked)
+		|| (SHpnt->host_self_blocked)
+		|| (SDpnt->device_blocked)) {
+		break;
+	    }
+	    q = &SDpnt->request_queue;
+	    q->request_fn(q);
+	}
+    }
+    
+    /*
+     * Now see whether there are other devices on the bus which
+     * might be starved.  If so, hit the request function.  If we
+     * don't find any, then it is safe to reset the flag.  If we
+     * find any device that it is starved, it isn't safe to reset the
+     * flag as the queue function releases the lock and thus some
+     * other device might have become starved along the way.
+     */
+    all_clear = 1;
+    if (SHpnt->some_device_starved) {
+	for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+	    request_queue_t *q;
+	    if ((SHpnt->can_queue > 0 &&(SHpnt->host_busy >= SHpnt->can_queue))
+		|| (SHpnt->host_blocked) 
+		|| (SHpnt->host_self_blocked)) {
+		break;
+	    }
+	    if (SDpnt->device_blocked || !SDpnt->starved) {
+		continue;
+	    }
+	    q = &SDpnt->request_queue;
+	    q->request_fn(q);
+	    all_clear = 0;
+	}
+	if (SDpnt == NULL && all_clear) {
+	    SHpnt->some_device_starved = 0;
+	}
+    }
+    spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Function:    scsi_end_request()
+ *
+ * Purpose:     Post-processing of completed commands called from interrupt
+ *              handler or a bottom-half handler.
+ *
+ * Arguments:   SCpnt    - command that is complete.
+ *              uptodate - 1 if I/O indicates success, 0 for I/O error.
+ *              sectors  - number of sectors we want to mark.
+ *		requeue  - indicates whether we should requeue leftovers.
+ *		frequeue - indicates that if we release the command block
+ *			   that the queue request function should be called.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This is called for block device requests in order to
+ *              mark some number of sectors as complete.
+ * 
+ *		We are guaranteeing that the request queue will be goosed
+ *		at some point during this call.
+ */
+static Scsi_Cmnd *__scsi_end_request(Scsi_Cmnd * SCpnt, 
+				     int uptodate, 
+				     int sectors,
+				     int requeue,
+				     int frequeue)
+{
+	struct request *req;
+	struct buffer_head *bh;
+        Scsi_Device * SDpnt;
+	int nsect;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	req = &SCpnt->request;
+	req->errors = 0;
+	if (!uptodate) {
+		printk(" I/O error: dev %s, sector %lu\n",
+		       kdevname(req->rq_dev), req->sector);
+	}
+	do {
+		if ((bh = req->bh) != NULL) {
+			nsect = bh->b_size >> 9;
+			blk_finished_io(nsect);
+			req->bh = bh->b_reqnext;
+			bh->b_reqnext = NULL;
+			sectors -= nsect;
+			bh->b_end_io(bh, uptodate);
+			if ((bh = req->bh) != NULL) {
+				req->hard_sector += nsect;
+				req->hard_nr_sectors -= nsect;
+				req->sector += nsect;
+				req->nr_sectors -= nsect;
+
+				req->current_nr_sectors = bh->b_size >> 9;
+				if (req->nr_sectors < req->current_nr_sectors) {
+					req->nr_sectors = req->current_nr_sectors;
+					printk("scsi_end_request: buffer-list destroyed\n");
+				}
+			}
+		}
+	} while (sectors && bh);
+
+	/*
+	 * If there are blocks left over at the end, set up the command
+	 * to queue the remainder of them.
+	 */
+	if (req->bh) {
+                request_queue_t *q;
+
+		if( !requeue )
+		{
+			return SCpnt;
+		}
+
+                q = &SCpnt->device->request_queue;
+
+		req->buffer = bh->b_data;
+		/*
+		 * Bleah.  Leftovers again.  Stick the leftovers in
+		 * the front of the queue, and goose the queue again.
+		 */
+		scsi_queue_next_request(q, SCpnt);
+		return SCpnt;
+	}
+#if 0
+	/*
+	 * This request is done.  If there is someone blocked waiting for this
+	 * request, wake them up.  Typically used to wake up processes trying
+	 * to swap a page into memory.
+	 */
+	if (req->waiting != NULL) {
+		complete(req->waiting);
+	}
+#endif
+	req_finished_io(req);
+	add_blkdev_randomness(MAJOR(req->rq_dev));
+
+        SDpnt = SCpnt->device;
+
+	/*
+	 * This will goose the queue request function at the end, so we don't
+	 * need to worry about launching another command.
+	 */
+	__scsi_release_command(SCpnt);
+
+	if( frequeue ) {
+		request_queue_t *q;
+
+		q = &SDpnt->request_queue;
+		scsi_queue_next_request(q, NULL);                
+	}
+	return NULL;
+}
+
+/*
+ * Function:    scsi_end_request()
+ *
+ * Purpose:     Post-processing of completed commands called from interrupt
+ *              handler or a bottom-half handler.
+ *
+ * Arguments:   SCpnt    - command that is complete.
+ *              uptodate - 1 if I/O indicates success, 0 for I/O error.
+ *              sectors  - number of sectors we want to mark.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This is called for block device requests in order to
+ *              mark some number of sectors as complete.
+ * 
+ *		We are guaranteeing that the request queue will be goosed
+ *		at some point during this call.
+ */
+Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate, int sectors)
+{
+	return __scsi_end_request(SCpnt, uptodate, sectors, 1, 1);
+}
+
+/*
+ * Function:    scsi_release_buffers()
+ *
+ * Purpose:     Completion processing for block device I/O requests.
+ *
+ * Arguments:   SCpnt   - command that we are bailing.
+ *
+ * Lock status: Assumed that no lock is held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       In the event that an upper level driver rejects a
+ *		command, we must release resources allocated during
+ *		the __init_io() function.  Primarily this would involve
+ *		the scatter-gather table, and potentially any bounce
+ *		buffers.
+ */
+static void scsi_release_buffers(Scsi_Cmnd * SCpnt)
+{
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	/*
+	 * Free up any indirection buffers we allocated for DMA purposes. 
+	 */
+	if (SCpnt->use_sg) {
+		struct scatterlist *sgpnt;
+		void **bbpnt;
+		int i;
+
+		sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+		bbpnt = SCpnt->bounce_buffers;
+
+		if (bbpnt) {
+			for (i = 0; i < SCpnt->use_sg; i++) {
+				if (bbpnt[i])
+					scsi_free(sgpnt[i].address, sgpnt[i].length);
+			}
+		}
+		scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+	} else {
+		if (SCpnt->request_buffer != SCpnt->request.buffer) {
+			scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen);
+		}
+	}
+
+	/*
+	 * Zero these out.  They now point to freed memory, and it is
+	 * dangerous to hang onto the pointers.
+	 */
+	SCpnt->buffer  = NULL;
+	SCpnt->bufflen = 0;
+	SCpnt->request_buffer = NULL;
+	SCpnt->request_bufflen = 0;
+}
+
+/*
+ * Function:    scsi_io_completion()
+ *
+ * Purpose:     Completion processing for block device I/O requests.
+ *
+ * Arguments:   SCpnt   - command that is finished.
+ *
+ * Lock status: Assumed that no lock is held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function is matched in terms of capabilities to
+ *              the function that created the scatter-gather list.
+ *              In other words, if there are no bounce buffers
+ *              (the normal case for most drivers), we don't need
+ *              the logic to deal with cleaning up afterwards.
+ */
+void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+			int block_sectors)
+{
+	int result = SCpnt->result;
+	int this_count = SCpnt->bufflen >> 9;
+	request_queue_t *q = &SCpnt->device->request_queue;
+
+	/*
+	 * We must do one of several things here:
+	 *
+	 *	Call scsi_end_request.  This will finish off the specified
+	 *	number of sectors.  If we are done, the command block will
+	 *	be released, and the queue function will be goosed.  If we
+	 *	are not done, then scsi_end_request will directly goose
+	 *	the queue.
+	 *
+	 *	We can just use scsi_queue_next_request() here.  This
+	 *	would be used if we just wanted to retry, for example.
+	 *
+	 */
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	/*
+	 * Free up any indirection buffers we allocated for DMA purposes. 
+	 * For the case of a READ, we need to copy the data out of the
+	 * bounce buffer and into the real buffer.
+	 */
+	if (SCpnt->use_sg) {
+		struct scatterlist *sgpnt;
+		void **bbpnt;
+		int i;
+
+		sgpnt = (struct scatterlist *) SCpnt->buffer;
+		bbpnt = SCpnt->bounce_buffers;
+
+		if (bbpnt) {
+			for (i = 0; i < SCpnt->use_sg; i++) {
+				if (bbpnt[i]) {
+					if (SCpnt->request.cmd == READ) {
+						memcpy(bbpnt[i],
+						       sgpnt[i].address,
+						       sgpnt[i].length);
+					}
+					scsi_free(sgpnt[i].address, sgpnt[i].length);
+				}
+			}
+		}
+		scsi_free(SCpnt->buffer, SCpnt->sglist_len);
+	} else {
+		if (SCpnt->buffer != SCpnt->request.buffer) {
+			if (SCpnt->request.cmd == READ) {
+				memcpy(SCpnt->request.buffer, SCpnt->buffer,
+				       SCpnt->bufflen);
+			}
+			scsi_free(SCpnt->buffer, SCpnt->bufflen);
+		}
+	}
+
+	/*
+	 * Zero these out.  They now point to freed memory, and it is
+	 * dangerous to hang onto the pointers.
+	 */
+	SCpnt->buffer  = NULL;
+	SCpnt->bufflen = 0;
+	SCpnt->request_buffer = NULL;
+	SCpnt->request_bufflen = 0;
+
+	/*
+	 * Next deal with any sectors which we were able to correctly
+	 * handle.
+	 */
+	if (good_sectors > 0) {
+		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n",
+					      SCpnt->request.nr_sectors,
+					      good_sectors));
+		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg));
+
+		SCpnt->request.errors = 0;
+		/*
+		 * If multiple sectors are requested in one buffer, then
+		 * they will have been finished off by the first command.
+		 * If not, then we have a multi-buffer command.
+		 *
+		 * If block_sectors != 0, it means we had a medium error
+		 * of some sort, and that we want to mark some number of
+		 * sectors as not uptodate.  Thus we want to inhibit
+		 * requeueing right here - we will requeue down below
+		 * when we handle the bad sectors.
+		 */
+		SCpnt = __scsi_end_request(SCpnt, 
+					   1, 
+					   good_sectors,
+					   result == 0,
+					   1);
+
+		/*
+		 * If the command completed without error, then either finish off the
+		 * rest of the command, or start a new one.
+		 */
+		if (result == 0 || SCpnt == NULL ) {
+			return;
+		}
+	}
+	/*
+	 * Now, if we were good little boys and girls, Santa left us a request
+	 * sense buffer.  We can extract information from this, so we
+	 * can choose a block to remap, etc.
+	 */
+	if (driver_byte(result) != 0) {
+		if (suggestion(result) == SUGGEST_REMAP) {
+#ifdef REMAP
+			/*
+			 * Not yet implemented.  A read will fail after being remapped,
+			 * a write will call the strategy routine again.
+			 */
+			if (SCpnt->device->remap) {
+				result = 0;
+			}
+#endif
+		}
+		if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70) {
+			/*
+			 * If the device is in the process of becoming ready,
+			 * retry.
+			 */
+			if (SCpnt->sense_buffer[12] == 0x04 &&
+			    SCpnt->sense_buffer[13] == 0x01) {
+				scsi_queue_next_request(q, SCpnt);
+				return;
+			}
+			if ((SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) {
+				if (SCpnt->device->removable) {
+					/* detected disc change.  set a bit 
+					 * and quietly refuse further access.
+		 			 */
+					SCpnt->device->changed = 1;
+					SCpnt = scsi_end_request(SCpnt, 0, this_count);
+					return;
+				} else {
+					/*
+				 	* Must have been a power glitch, or a
+				 	* bus reset.  Could not have been a
+				 	* media change, so we just retry the
+				 	* request and see what happens.  
+				 	*/
+					scsi_queue_next_request(q, SCpnt);
+					return;
+				}
+			}
+		}
+		/* If we had an ILLEGAL REQUEST returned, then we may have
+		 * performed an unsupported command.  The only thing this should be
+		 * would be a ten byte read where only a six byte read was supported.
+		 * Also, on a system where READ CAPACITY failed, we have have read
+		 * past the end of the disk.
+		 */
+
+		switch (SCpnt->sense_buffer[2]) {
+		case ILLEGAL_REQUEST:
+			if (SCpnt->device->ten) {
+				SCpnt->device->ten = 0;
+				/*
+				 * This will cause a retry with a 6-byte
+				 * command.
+				 */
+				scsi_queue_next_request(q, SCpnt);
+				result = 0;
+			} else {
+				SCpnt = scsi_end_request(SCpnt, 0, this_count);
+				return;
+			}
+			break;
+		case NOT_READY:
+			printk(KERN_INFO "Device %s not ready.\n",
+			       kdevname(SCpnt->request.rq_dev));
+			SCpnt = scsi_end_request(SCpnt, 0, this_count);
+			return;
+			break;
+		case MEDIUM_ERROR:
+		case VOLUME_OVERFLOW:
+			printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
+			       SCpnt->host->host_no, (int) SCpnt->channel,
+			       (int) SCpnt->target, (int) SCpnt->lun);
+			print_command(SCpnt->cmnd);
+			print_sense("sd", SCpnt);
+			SCpnt = scsi_end_request(SCpnt, 0, block_sectors);
+			return;
+		default:
+			break;
+		}
+	}			/* driver byte != 0 */
+	if (host_byte(result) == DID_RESET) {
+		/*
+		 * Third party bus reset or reset for error
+		 * recovery reasons.  Just retry the request
+		 * and see what happens.  
+		 */
+		scsi_queue_next_request(q, SCpnt);
+		return;
+	}
+	if (result) {
+		struct Scsi_Device_Template *STpnt;
+
+		STpnt = scsi_get_request_dev(&SCpnt->request);
+		printk("SCSI %s error : host %d channel %d id %d lun %d return code = %x\n",
+		       (STpnt ? STpnt->name : "device"),
+		       SCpnt->device->host->host_no,
+		       SCpnt->device->channel,
+		       SCpnt->device->id,
+		       SCpnt->device->lun, result);
+
+		if (driver_byte(result) & DRIVER_SENSE)
+			print_sense("sd", SCpnt);
+		/*
+		 * Mark a single buffer as not uptodate.  Queue the remainder.
+		 * We sometimes get this cruft in the event that a medium error
+		 * isn't properly reported.
+		 */
+		SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors);
+		return;
+	}
+}
+
+/*
+ * Function:    scsi_get_request_dev()
+ *
+ * Purpose:     Find the upper-level driver that is responsible for this
+ *              request
+ *
+ * Arguments:   request   - I/O request we are preparing to queue.
+ *
+ * Lock status: No locks assumed to be held, but as it happens the
+ *              io_request_lock is held when this is called.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       The requests in the request queue may have originated
+ *              from any block device driver.  We need to find out which
+ *              one so that we can later form the appropriate command.
+ */
+struct Scsi_Device_Template *scsi_get_request_dev(struct request *req)
+{
+	struct Scsi_Device_Template *spnt;
+	kdev_t dev = req->rq_dev;
+	int major = MAJOR(dev);
+
+	ASSERT_LOCK(&io_request_lock, 1);
+
+	for (spnt = scsi_devicelist; spnt; spnt = spnt->next) {
+		/*
+		 * Search for a block device driver that supports this
+		 * major.
+		 */
+		if (spnt->blk && spnt->major == major) {
+			return spnt;
+		}
+		/*
+		 * I am still not entirely satisfied with this solution,
+		 * but it is good enough for now.  Disks have a number of
+		 * major numbers associated with them, the primary
+		 * 8, which we test above, and a secondary range of 7
+		 * different consecutive major numbers.   If this ever
+		 * becomes insufficient, then we could add another function
+		 * to the structure, and generalize this completely.
+		 */
+		if( spnt->min_major != 0 
+		    && spnt->max_major != 0
+		    && major >= spnt->min_major
+		    && major <= spnt->max_major )
+		{
+			return spnt;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Function:    scsi_request_fn()
+ *
+ * Purpose:     Generic version of request function for SCSI hosts.
+ *
+ * Arguments:   q       - Pointer to actual queue.
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: IO request lock assumed to be held when called.
+ *
+ * Notes:       The theory is that this function is something which individual
+ *              drivers could also supply if they wished to.   The problem
+ *              is that we have 30 some odd low-level drivers in the kernel
+ *              tree already, and it would be most difficult to retrofit
+ *              this crap into all of them.   Thus this function has the job
+ *              of acting as a generic queue manager for all of those existing
+ *              drivers.
+ */
+void scsi_request_fn(request_queue_t * q)
+{
+	struct request *req;
+	Scsi_Cmnd *SCpnt;
+	Scsi_Request *SRpnt;
+	Scsi_Device *SDpnt;
+	struct Scsi_Host *SHpnt;
+	struct Scsi_Device_Template *STpnt;
+
+	ASSERT_LOCK(&io_request_lock, 1);
+
+	SDpnt = (Scsi_Device *) q->queuedata;
+	if (!SDpnt) {
+		panic("Missing device");
+	}
+	SHpnt = SDpnt->host;
+
+	/*
+	 * To start with, we keep looping until the queue is empty, or until
+	 * the host is no longer able to accept any more requests.
+	 */
+	while (1 == 1) {
+		/*
+		 * Check this again - each time we loop through we will have
+		 * released the lock and grabbed it again, so each time
+		 * we need to check to see if the queue is plugged or not.
+		 */
+		if (SHpnt->in_recovery || q->plugged)
+			return;
+
+		/*
+		 * If the device cannot accept another request, then quit.
+		 */
+		if (SDpnt->device_blocked) {
+			break;
+		}
+		if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
+		    || (SHpnt->host_blocked) 
+		    || (SHpnt->host_self_blocked)) {
+			/*
+			 * If we are unable to process any commands at all for
+			 * this device, then we consider it to be starved.
+			 * What this means is that there are no outstanding
+			 * commands for this device and hence we need a
+			 * little help getting it started again
+			 * once the host isn't quite so busy.
+			 */
+			if (SDpnt->device_busy == 0) {
+				SDpnt->starved = 1;
+				SHpnt->some_device_starved = 1;
+			}
+			break;
+		} else {
+			SDpnt->starved = 0;
+		}
+
+ 		/*
+		 * FIXME(eric)
+		 * I am not sure where the best place to do this is.  We need
+		 * to hook in a place where we are likely to come if in user
+		 * space.   Technically the error handling thread should be
+		 * doing this crap, but the error handler isn't used by
+		 * most hosts.
+		 */
+		if (SDpnt->was_reset) {
+			/*
+			 * We need to relock the door, but we might
+			 * be in an interrupt handler.  Only do this
+			 * from user space, since we do not want to
+			 * sleep from an interrupt.
+			 *
+			 * FIXME(eric) - have the error handler thread do
+			 * this work.
+			 */
+			SDpnt->was_reset = 0;
+			if (SDpnt->removable && !in_interrupt()) {
+				spin_unlock_irq(&io_request_lock);
+				scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0);
+				spin_lock_irq(&io_request_lock);
+				continue;
+			}
+		}
+
+		/*
+		 * If we couldn't find a request that could be queued, then we
+		 * can also quit.
+		 */
+		if (list_empty(&q->queue_head))
+			break;
+
+		/*
+		 * Loop through all of the requests in this queue, and find
+		 * one that is queueable.
+		 */
+		req = blkdev_entry_next_request(&q->queue_head);
+
+		/*
+		 * Find the actual device driver associated with this command.
+		 * The SPECIAL requests are things like character device or
+		 * ioctls, which did not originate from ll_rw_blk.  Note that
+		 * the special field is also used to indicate the SCpnt for
+		 * the remainder of a partially fulfilled request that can 
+		 * come up when there is a medium error.  We have to treat
+		 * these two cases differently.  We differentiate by looking
+		 * at request.cmd, as this tells us the real story.
+		 */
+		if (req->cmd == SPECIAL) {
+			STpnt = NULL;
+			SCpnt = (Scsi_Cmnd *) req->special;
+			SRpnt = (Scsi_Request *) req->special;
+
+			if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) {
+				SCpnt = scsi_allocate_device(SRpnt->sr_device, 
+							     FALSE, FALSE);
+				if( !SCpnt ) {
+					break;
+				}
+				scsi_init_cmd_from_req(SCpnt, SRpnt);
+			}
+
+		} else {
+			SRpnt = NULL;
+			STpnt = scsi_get_request_dev(req);
+			if (!STpnt) {
+				panic("Unable to find device associated with request");
+			}
+			/*
+			 * Now try and find a command block that we can use.
+			 */
+			if( req->special != NULL ) {
+				SCpnt = (Scsi_Cmnd *) req->special;
+				/*
+				 * We need to recount the number of
+				 * scatter-gather segments here - the
+				 * normal case code assumes this to be
+				 * correct, as it would be a performance
+				 * lose to always recount.  Handling
+				 * errors is always unusual, of course.
+				 */
+				recount_segments(SCpnt);
+			} else {
+				SCpnt = scsi_allocate_device(SDpnt, FALSE, FALSE);
+			}
+			/*
+			 * If so, we are ready to do something.  Bump the count
+			 * while the queue is locked and then break out of the
+			 * loop. Otherwise loop around and try another request.
+			 */
+			if (!SCpnt) {
+				break;
+			}
+		}
+
+		/*
+		 * Now bump the usage count for both the host and the
+		 * device.
+		 */
+		SHpnt->host_busy++;
+		SDpnt->device_busy++;
+
+		/*
+		 * Finally, before we release the lock, we copy the
+		 * request to the command block, and remove the
+		 * request from the request list.   Note that we always
+		 * operate on the queue head - there is absolutely no
+		 * reason to search the list, because all of the commands
+		 * in this queue are for the same device.
+		 */
+		blkdev_dequeue_request(req);
+
+		if (req != &SCpnt->request && req != &SRpnt->sr_request ) {
+			memcpy(&SCpnt->request, req, sizeof(struct request));
+
+			/*
+			 * We have copied the data out of the request block -
+			 * it is now in a field in SCpnt.  Release the request
+			 * block.
+			 */
+			blkdev_release_request(req);
+		}
+		/*
+		 * Now it is finally safe to release the lock.  We are
+		 * not going to noodle the request list until this
+		 * request has been queued and we loop back to queue
+		 * another.  
+		 */
+		req = NULL;
+		spin_unlock_irq(&io_request_lock);
+
+		if (SCpnt->request.cmd != SPECIAL) {
+			/*
+			 * This will do a couple of things:
+			 *  1) Fill in the actual SCSI command.
+			 *  2) Fill in any other upper-level specific fields
+			 * (timeout).
+			 *
+			 * If this returns 0, it means that the request failed
+			 * (reading past end of disk, reading offline device,
+			 * etc).   This won't actually talk to the device, but
+			 * some kinds of consistency checking may cause the	
+			 * request to be rejected immediately.
+			 */
+			if (STpnt == NULL) {
+				STpnt = scsi_get_request_dev(req);
+			}
+			/* 
+			 * This sets up the scatter-gather table (allocating if
+			 * required).  Hosts that need bounce buffers will also
+			 * get those allocated here.  
+			 */
+			if (!SDpnt->scsi_init_io_fn(SCpnt)) {
+				SCpnt = __scsi_end_request(SCpnt, 0, 
+							   SCpnt->request.nr_sectors, 0, 0);
+				if( SCpnt != NULL )
+				{
+					panic("Should not have leftover blocks\n");
+				}
+				spin_lock_irq(&io_request_lock);
+				SHpnt->host_busy--;
+				SDpnt->device_busy--;
+				continue;
+			}
+			/*
+			 * Initialize the actual SCSI command for this request.
+			 */
+			if (!STpnt->init_command(SCpnt)) {
+				scsi_release_buffers(SCpnt);
+				SCpnt = __scsi_end_request(SCpnt, 0, 
+							   SCpnt->request.nr_sectors, 0, 0);
+				if( SCpnt != NULL )
+				{
+					panic("Should not have leftover blocks\n");
+				}
+				spin_lock_irq(&io_request_lock);
+				SHpnt->host_busy--;
+				SDpnt->device_busy--;
+				continue;
+			}
+		}
+		/*
+		 * Finally, initialize any error handling parameters, and set up
+		 * the timers for timeouts.
+		 */
+		scsi_init_cmd_errh(SCpnt);
+
+		/*
+		 * Dispatch the command to the low-level driver.
+		 */
+		scsi_dispatch_cmd(SCpnt);
+
+		/*
+		 * Now we need to grab the lock again.  We are about to mess
+		 * with the request queue and try to find another command.
+		 */
+		spin_lock_irq(&io_request_lock);
+	}
+}
+
+/*
+ * Function:    scsi_block_requests()
+ *
+ * Purpose:     Utility function used by low-level drivers to prevent further
+ *		commands from being queued to the device.
+ *
+ * Arguments:   SHpnt       - Host in question
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes:       There is no timer nor any other means by which the requests
+ *		get unblocked other than the low-level driver calling
+ *		scsi_unblock_requests().
+ */
+void scsi_block_requests(struct Scsi_Host * SHpnt)
+{
+	SHpnt->host_self_blocked = TRUE;
+}
+
+/*
+ * Function:    scsi_unblock_requests()
+ *
+ * Purpose:     Utility function used by low-level drivers to allow further
+ *		commands from being queued to the device.
+ *
+ * Arguments:   SHpnt       - Host in question
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes:       There is no timer nor any other means by which the requests
+ *		get unblocked other than the low-level driver calling
+ *		scsi_unblock_requests().
+ *
+ *		This is done as an API function so that changes to the
+ *		internals of the scsi mid-layer won't require wholesale
+ *		changes to drivers that use this feature.
+ */
+void scsi_unblock_requests(struct Scsi_Host * SHpnt)
+{
+	Scsi_Device *SDloop;
+
+	SHpnt->host_self_blocked = FALSE;
+	/* Now that we are unblocked, try to start the queues. */
+	for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next)
+		scsi_queue_next_request(&SDloop->request_queue, NULL);
+}
+
+/*
+ * Function:    scsi_report_bus_reset()
+ *
+ * Purpose:     Utility function used by low-level drivers to report that
+ *		they have observed a bus reset on the bus being handled.
+ *
+ * Arguments:   SHpnt       - Host in question
+ *		channel     - channel on which reset was observed.
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes:       This only needs to be called if the reset is one which
+ *		originates from an unknown location.  Resets originated
+ *		by the mid-level itself don't need to call this, but there
+ *		should be no harm.
+ *
+ *		The main purpose of this is to make sure that a CHECK_CONDITION
+ *		is properly treated.
+ */
+void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel)
+{
+	Scsi_Device *SDloop;
+	for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next) {
+		if (channel == SDloop->channel) {
+			SDloop->was_reset = 1;
+			SDloop->expecting_cc_ua = 1;
+		}
+	}
+}
+
+/*
+ * FIXME(eric) - these are empty stubs for the moment.  I need to re-implement
+ * host blocking from scratch. The theory is that hosts that wish to block
+ * will register/deregister using these functions instead of the old way
+ * of setting the wish_block flag.
+ *
+ * The details of the implementation remain to be settled, however the
+ * stubs are here now so that the actual drivers will properly compile.
+ */
+void scsi_register_blocked_host(struct Scsi_Host * SHpnt)
+{
+}
+
+void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt)
+{
+}
diff --git a/xen/drivers/scsi/scsi_merge.c b/xen/drivers/scsi/scsi_merge.c
new file mode 100644
index 0000000000..92306b3ec0
--- /dev/null
+++ b/xen/drivers/scsi/scsi_merge.c
@@ -0,0 +1,1181 @@
+/*
+ *  scsi_merge.c Copyright (C) 1999 Eric Youngdale
+ *
+ *  SCSI queueing library.
+ *      Initial versions: Eric Youngdale (eric@andante.org).
+ *                        Based upon conversations with large numbers
+ *                        of people at Linux Expo.
+ *	Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com).
+ */
+
+/*
+ * This file contains queue management functions that are used by SCSI.
+ * Typically this is used for several purposes.   First, we need to ensure
+ * that commands do not grow so large that they cannot be handled all at
+ * once by a host adapter.   The various flavors of merge functions included
+ * here serve this purpose.
+ *
+ * Note that it would be quite trivial to allow the low-level driver the
+ * flexibility to define it's own queue handling functions.  For the time
+ * being, the hooks are not present.   Right now we are just using the
+ * data in the host template as an indicator of how we should be handling
+ * queues, and we select routines that are optimized for that purpose.
+ *
+ * Some hosts do not impose any restrictions on the size of a request.
+ * In such cases none of the merge functions in this file are called,
+ * and we allow ll_rw_blk to merge requests in the default manner.
+ * This isn't guaranteed to be optimal, but it should be pretty darned
+ * good.   If someone comes up with ideas of better ways of managing queues
+ * to improve on the default behavior, then certainly fit it into this
+ * scheme in whatever manner makes the most sense.   Please note that
+ * since each device has it's own queue, we have considerable flexibility
+ * in queue management.
+ */
+
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/*  #include <xeno/string.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/ioport.h> */
+/*  #include <xeno/kernel.h> */
+/*  #include <xeno/stat.h> */
+#include <xeno/blk.h>
+/*  #include <xeno/interrupt.h> */
+/*  #include <xeno/delay.h> */
+/*  #include <xeno/smp_lock.h> */
+
+
+#define __KERNEL_SYSCALLS__
+
+/*  #include <xeno/unistd.h> */
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+#include <scsi/scsi_ioctl.h>
+
+/*
+ * This means that bounce buffers cannot be allocated in chunks > PAGE_SIZE.
+ * Ultimately we should get away from using a dedicated DMA bounce buffer
+ * pool, and we should instead try and use kmalloc() instead.  If we can
+ * eliminate this pool, then this restriction would no longer be needed.
+ */
+#define DMA_SEGMENT_SIZE_LIMITED
+
+#ifdef CONFIG_SCSI_DEBUG_QUEUES
+/*
+ * Enable a bunch of additional consistency checking.   Turn this off
+ * if you are benchmarking.
+ */
+static int dump_stats(struct request *req,
+		      int use_clustering,
+		      int dma_host,
+		      int segments)
+{
+	struct buffer_head *bh;
+
+	/*
+	 * Dump the information that we have.  We know we have an
+	 * inconsistency.
+	 */
+	printk("nr_segments is %x\n", req->nr_segments);
+	printk("counted segments is %x\n", segments);
+	printk("Flags %d %d\n", use_clustering, dma_host);
+	for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) 
+	{
+		printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
+		       bh,
+		       bh->b_size >> 9,
+		       virt_to_phys(bh->b_data - 1));
+	}
+	panic("Ththththaats all folks.  Too dangerous to continue.\n");
+}
+
+
+/*
+ * Simple sanity check that we will use for the first go around
+ * in order to ensure that we are doing the counting correctly.
+ * This can be removed for optimization.
+ */
+#define SANITY_CHECK(req, _CLUSTER, _DMA)				\
+    if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) )	\
+    {									\
+	printk("Incorrect segment count at 0x%p", current_text_addr());	\
+	dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \
+    }
+#else
+#define SANITY_CHECK(req, _CLUSTER, _DMA)
+#endif
+
+static void dma_exhausted(Scsi_Cmnd * SCpnt, int i)
+{
+	int jj;
+	struct scatterlist *sgpnt;
+	void **bbpnt;
+	int consumed = 0;
+
+	sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+	bbpnt = SCpnt->bounce_buffers;
+
+	/*
+	 * Now print out a bunch of stats.  First, start with the request
+	 * size.
+	 */
+	printk("dma_free_sectors:%d\n", scsi_dma_free_sectors);
+	printk("use_sg:%d\ti:%d\n", SCpnt->use_sg, i);
+	printk("request_bufflen:%d\n", SCpnt->request_bufflen);
+	/*
+	 * Now dump the scatter-gather table, up to the point of failure.
+	 */
+	for(jj=0; jj < SCpnt->use_sg; jj++)
+	{
+		printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n",
+		       jj,
+		       sgpnt[jj].length,
+		       sgpnt[jj].address,
+		       (bbpnt ? bbpnt[jj] : NULL));
+		if (bbpnt && bbpnt[jj])
+			consumed += sgpnt[jj].length;
+	}
+	printk("Total %d sectors consumed\n", consumed);
+	panic("DMA pool exhausted");
+}
+
+#define CLUSTERABLE_DEVICE(SH,SD) (SH->use_clustering)
+
+/*
+ * This entire source file deals with the new queueing code.
+ */
+
+/*
+ * Function:    __count_segments()
+ *
+ * Purpose:     Prototype for queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *		remainder - used to track the residual size of the last
+ *			segment.  Comes in handy when we want to limit the 
+ *			size of bounce buffer segments to PAGE_SIZE.
+ *
+ * Returns:     Count of the number of SG segments for the request.
+ *
+ * Lock status: 
+ *
+ * Notes:       This is only used for diagnostic purposes.
+ */
+__inline static int __count_segments(struct request *req,
+				     int use_clustering,
+				     int dma_host,
+				     int * remainder)
+{
+	int ret = 1;
+	int reqsize = 0;
+	struct buffer_head *bh;
+	struct buffer_head *bhnext;
+
+	if( remainder != NULL ) {
+		reqsize = *remainder;
+	}
+
+	/*
+	 * Add in the size increment for the first buffer.
+	 */
+	bh = req->bh;
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+	if( reqsize + bh->b_size > PAGE_SIZE ) {
+		ret++;
+		reqsize = bh->b_size;
+	} else {
+		reqsize += bh->b_size;
+	}
+#else
+	reqsize += bh->b_size;
+#endif
+
+	for (bh = req->bh, bhnext = bh->b_reqnext; 
+	     bhnext != NULL; 
+	     bh = bhnext, bhnext = bh->b_reqnext) {
+		if (use_clustering) {
+			/* 
+			 * See if we can do this without creating another
+			 * scatter-gather segment.  In the event that this is a
+			 * DMA capable host, make sure that a segment doesn't span
+			 * the DMA threshold boundary.  
+			 */
+			if (dma_host &&
+			    virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) {
+				ret++;
+				reqsize = bhnext->b_size;
+			} else if (CONTIGUOUS_BUFFERS(bh, bhnext)) {
+				/*
+				 * This one is OK.  Let it go.
+				 */ 
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+				/* Note scsi_malloc is only able to hand out
+				 * chunks of memory in sizes of PAGE_SIZE or
+				 * less.  Thus we need to keep track of
+				 * the size of the piece that we have
+				 * seen so far, and if we have hit
+				 * the limit of PAGE_SIZE, then we are
+				 * kind of screwed and we need to start
+				 * another segment.
+				 */
+				if( dma_host
+				    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD
+				    && reqsize + bhnext->b_size > PAGE_SIZE )
+				{
+					ret++;
+					reqsize = bhnext->b_size;
+					continue;
+				}
+#endif
+				reqsize += bhnext->b_size;
+				continue;
+			}
+			ret++;
+			reqsize = bhnext->b_size;
+		} else {
+			ret++;
+			reqsize = bhnext->b_size;
+		}
+	}
+	if( remainder != NULL ) {
+		*remainder = reqsize;
+	}
+	return ret;
+}
+
+/*
+ * Function:    recount_segments()
+ *
+ * Purpose:     Recount the number of scatter-gather segments for this request.
+ *
+ * Arguments:   req     - request that needs recounting.
+ *
+ * Returns:     Count of the number of SG segments for the request.
+ *
+ * Lock status: Irrelevant.
+ *
+ * Notes:	This is only used when we have partially completed requests
+ *		and the bit that is leftover is of an indeterminate size.
+ *		This can come up if you get a MEDIUM_ERROR, for example,
+ *		as we will have "completed" all of the sectors up to and
+ *		including the bad sector, and the leftover bit is what
+ *		we have to do now.  This tends to be a rare occurrence, so
+ *		we aren't busting our butts to instantiate separate versions
+ *		of this function for the 4 different flag values.  We
+ *		probably should, however.
+ */
+void
+recount_segments(Scsi_Cmnd * SCpnt)
+{
+	struct request *req;
+	struct Scsi_Host *SHpnt;
+	Scsi_Device * SDpnt;
+
+	req   = &SCpnt->request;
+	SHpnt = SCpnt->host;
+	SDpnt = SCpnt->device;
+
+	req->nr_segments = __count_segments(req, 
+					    CLUSTERABLE_DEVICE(SHpnt, SDpnt),
+					    SHpnt->unchecked_isa_dma, NULL);
+}
+
+#define MERGEABLE_BUFFERS(X,Y) \
+(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \
+  (DMA_CHUNK_SIZE - 1)) == 0)
+
+#ifdef DMA_CHUNK_SIZE
+static inline int scsi_new_mergeable(request_queue_t * q,
+				     struct request * req,
+				     struct Scsi_Host *SHpnt,
+				     int max_segments)
+{
+	/*
+	 * pci_map_sg will be able to merge these two
+	 * into a single hardware sg entry, check if
+	 * we'll have enough memory for the sg list.
+	 * scsi.c allocates for this purpose
+	 * min(64,sg_tablesize) entries.
+	 */
+	if (req->nr_segments >= max_segments ||
+	    req->nr_segments >= SHpnt->sg_tablesize)
+		return 0;
+	req->nr_segments++;
+	return 1;
+}
+
+static inline int scsi_new_segment(request_queue_t * q,
+				   struct request * req,
+				   struct Scsi_Host *SHpnt,
+				   int max_segments)
+{
+	/*
+	 * pci_map_sg won't be able to map these two
+	 * into a single hardware sg entry, so we have to
+	 * check if things fit into sg_tablesize.
+	 */
+	if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
+	     req->nr_segments >= SHpnt->sg_tablesize)
+		return 0;
+	req->nr_hw_segments++;
+	req->nr_segments++;
+	return 1;
+}
+#else
+static inline int scsi_new_segment(request_queue_t * q,
+				   struct request * req,
+				   struct Scsi_Host *SHpnt,
+				   int max_segments)
+{
+	if (req->nr_segments < SHpnt->sg_tablesize &&
+	    req->nr_segments < max_segments) {
+		/*
+		 * This will form the start of a new segment.  Bump the 
+		 * counter.
+		 */
+		req->nr_segments++;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+#endif
+
+/*
+ * Function:    __scsi_merge_fn()
+ *
+ * Purpose:     Prototype for queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              bh      - Block which we may wish to merge into request
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *
+ * Returns:     1 if it is OK to merge the block into the request.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Some drivers have limited scatter-gather table sizes, and
+ *              thus they cannot queue an infinitely large command.  This
+ *              function is called from ll_rw_blk before it attempts to merge
+ *              a new block into a request to make sure that the request will
+ *              not become too large.
+ *
+ *              This function is not designed to be directly called.  Instead
+ *              it should be referenced from other functions where the
+ *              use_clustering and dma_host parameters should be integer
+ *              constants.  The compiler should thus be able to properly
+ *              optimize the code, eliminating stuff that is irrelevant.
+ *              It is more maintainable to do this way with a single function
+ *              than to have 4 separate functions all doing roughly the
+ *              same thing.
+ */
+__inline static int __scsi_back_merge_fn(request_queue_t * q,
+					 struct request *req,
+					 struct buffer_head *bh,
+					 int max_segments,
+					 int use_clustering,
+					 int dma_host)
+{
+	unsigned int count;
+	unsigned int segment_size = 0;
+	Scsi_Device *SDpnt;
+	struct Scsi_Host *SHpnt;
+
+	SDpnt = (Scsi_Device *) q->queuedata;
+	SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+	if (max_segments > 64)
+		max_segments = 64;
+#endif
+
+	if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+		return 0;
+
+	if (use_clustering) {
+		/* 
+		 * See if we can do this without creating another
+		 * scatter-gather segment.  In the event that this is a
+		 * DMA capable host, make sure that a segment doesn't span
+		 * the DMA threshold boundary.  
+		 */
+		if (dma_host &&
+		    virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+			goto new_end_segment;
+		}
+		if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+			if( dma_host
+			    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+				segment_size = 0;
+				count = __count_segments(req, use_clustering, dma_host, &segment_size);
+				if( segment_size + bh->b_size > PAGE_SIZE ) {
+					goto new_end_segment;
+				}
+			}
+#endif
+			/*
+			 * This one is OK.  Let it go.
+			 */
+			return 1;
+		}
+	}
+ new_end_segment:
+#ifdef DMA_CHUNK_SIZE
+	if (MERGEABLE_BUFFERS(req->bhtail, bh))
+		return scsi_new_mergeable(q, req, SHpnt, max_segments);
+#endif
+	return scsi_new_segment(q, req, SHpnt, max_segments);
+}
+
+__inline static int __scsi_front_merge_fn(request_queue_t * q,
+					  struct request *req,
+					  struct buffer_head *bh,
+					  int max_segments,
+					  int use_clustering,
+					  int dma_host)
+{
+	unsigned int count;
+	unsigned int segment_size = 0;
+	Scsi_Device *SDpnt;
+	struct Scsi_Host *SHpnt;
+
+	SDpnt = (Scsi_Device *) q->queuedata;
+	SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+	if (max_segments > 64)
+		max_segments = 64;
+#endif
+
+	if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+		return 0;
+
+	if (use_clustering) {
+		/* 
+		 * See if we can do this without creating another
+		 * scatter-gather segment.  In the event that this is a
+		 * DMA capable host, make sure that a segment doesn't span
+		 * the DMA threshold boundary. 
+		 */
+		if (dma_host &&
+		    virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) {
+			goto new_start_segment;
+		}
+		if (CONTIGUOUS_BUFFERS(bh, req->bh)) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+			if( dma_host
+			    && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+				segment_size = bh->b_size;
+				count = __count_segments(req, use_clustering, dma_host, &segment_size);
+				if( count != req->nr_segments ) {
+					goto new_start_segment;
+				}
+			}
+#endif
+			/*
+			 * This one is OK.  Let it go.
+			 */
+			return 1;
+		}
+	}
+ new_start_segment:
+#ifdef DMA_CHUNK_SIZE
+	if (MERGEABLE_BUFFERS(bh, req->bh))
+		return scsi_new_mergeable(q, req, SHpnt, max_segments);
+#endif
+	return scsi_new_segment(q, req, SHpnt, max_segments);
+}
+
+/*
+ * Function:    scsi_merge_fn_()
+ *
+ * Purpose:     queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              bh      - Block which we may wish to merge into request
+ *
+ * Returns:     1 if it is OK to merge the block into the request.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Optimized for different cases depending upon whether
+ *              ISA DMA is in use and whether clustering should be used.
+ */
+#define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA)		\
+static int _FUNCTION(request_queue_t * q,				\
+		     struct request * req,				\
+		     struct buffer_head * bh,				\
+		     int max_segments)					\
+{									\
+    int ret;								\
+    SANITY_CHECK(req, _CLUSTER, _DMA);					\
+    ret =  __scsi_ ## _BACK_FRONT ## _merge_fn(q,			\
+					       req,			\
+					       bh,			\
+					       max_segments,		\
+					       _CLUSTER,		\
+					       _DMA);			\
+    return ret;								\
+}
+
+/* Version with use_clustering 0 and dma_host 1 is not necessary,
+ * since the only use of dma_host above is protected by use_clustering.
+ */
+MERGEFCT(scsi_back_merge_fn_, back, 0, 0)
+MERGEFCT(scsi_back_merge_fn_c, back, 1, 0)
+MERGEFCT(scsi_back_merge_fn_dc, back, 1, 1)
+
+MERGEFCT(scsi_front_merge_fn_, front, 0, 0)
+MERGEFCT(scsi_front_merge_fn_c, front, 1, 0)
+MERGEFCT(scsi_front_merge_fn_dc, front, 1, 1)
+
+/*
+ * Function:    __scsi_merge_requests_fn()
+ *
+ * Purpose:     Prototype for queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              next    - 2nd request that we might want to combine with req
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *
+ * Returns:     1 if it is OK to merge the two requests.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Some drivers have limited scatter-gather table sizes, and
+ *              thus they cannot queue an infinitely large command.  This
+ *              function is called from ll_rw_blk before it attempts to merge
+ *              a new block into a request to make sure that the request will
+ *              not become too large.
+ *
+ *              This function is not designed to be directly called.  Instead
+ *              it should be referenced from other functions where the
+ *              use_clustering and dma_host parameters should be integer
+ *              constants.  The compiler should thus be able to properly
+ *              optimize the code, eliminating stuff that is irrelevant.
+ *              It is more maintainable to do this way with a single function
+ *              than to have 4 separate functions all doing roughly the
+ *              same thing.
+ */
+__inline static int __scsi_merge_requests_fn(request_queue_t * q,
+					     struct request *req,
+					     struct request *next,
+					     int max_segments,
+					     int use_clustering,
+					     int dma_host)
+{
+	Scsi_Device *SDpnt;
+	struct Scsi_Host *SHpnt;
+
+	/*
+	 * First check if the either of the requests are re-queued
+	 * requests.  Can't merge them if they are.
+	 */
+	if (req->special || next->special)
+		return 0;
+
+	SDpnt = (Scsi_Device *) q->queuedata;
+	SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+	if (max_segments > 64)
+		max_segments = 64;
+
+	/* If it would not fit into prepared memory space for sg chain,
+	 * then don't allow the merge.
+	 */
+	if (req->nr_segments + next->nr_segments - 1 > max_segments ||
+	    req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
+		return 0;
+	}
+	if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) {
+		return 0;
+	}
+#else
+	/*
+	 * If the two requests together are too large (even assuming that we
+	 * can merge the boundary requests into one segment, then don't
+	 * allow the merge.
+	 */
+	if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
+		return 0;
+	}
+#endif
+
+	if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors)
+		return 0;
+
+	/*
+	 * The main question is whether the two segments at the boundaries
+	 * would be considered one or two.
+	 */
+	if (use_clustering) {
+		/* 
+		 * See if we can do this without creating another
+		 * scatter-gather segment.  In the event that this is a
+		 * DMA capable host, make sure that a segment doesn't span
+		 * the DMA threshold boundary.  
+		 */
+		if (dma_host &&
+		    virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+			goto dont_combine;
+		}
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+		/*
+		 * We currently can only allocate scatter-gather bounce
+		 * buffers in chunks of PAGE_SIZE or less.
+		 */
+		if (dma_host
+		    && CONTIGUOUS_BUFFERS(req->bhtail, next->bh)
+		    && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD )
+		{
+			int segment_size = 0;
+			int count = 0;
+
+			count = __count_segments(req, use_clustering, dma_host, &segment_size);
+			count += __count_segments(next, use_clustering, dma_host, &segment_size);
+			if( count != req->nr_segments + next->nr_segments ) {
+				goto dont_combine;
+			}
+		}
+#endif
+		if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) {
+			/*
+			 * This one is OK.  Let it go.
+			 */
+			req->nr_segments += next->nr_segments - 1;
+#ifdef DMA_CHUNK_SIZE
+			req->nr_hw_segments += next->nr_hw_segments - 1;
+#endif
+			return 1;
+		}
+	}
+      dont_combine:
+#ifdef DMA_CHUNK_SIZE
+	if (req->nr_segments + next->nr_segments > max_segments ||
+	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+		return 0;
+	}
+	/* If dynamic DMA mapping can merge last segment in req with
+	 * first segment in next, then the check for hw segments was
+	 * done above already, so we can always merge.
+	 */
+	if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) {
+		req->nr_hw_segments += next->nr_hw_segments - 1;
+	} else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) {
+		return 0;
+	} else {
+		req->nr_hw_segments += next->nr_hw_segments;
+	}
+	req->nr_segments += next->nr_segments;
+	return 1;
+#else
+	/*
+	 * We know that the two requests at the boundary should not be combined.
+	 * Make sure we can fix something that is the sum of the two.
+	 * A slightly stricter test than we had above.
+	 */
+	if (req->nr_segments + next->nr_segments > max_segments ||
+	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+		return 0;
+	} else {
+		/*
+		 * This will form the start of a new segment.  Bump the 
+		 * counter.
+		 */
+		req->nr_segments += next->nr_segments;
+		return 1;
+	}
+#endif
+}
+
+/*
+ * Function:    scsi_merge_requests_fn_()
+ *
+ * Purpose:     queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              bh      - Block which we may wish to merge into request
+ *
+ * Returns:     1 if it is OK to merge the block into the request.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Optimized for different cases depending upon whether
+ *              ISA DMA is in use and whether clustering should be used.
+ */
+#define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA)		\
+static int _FUNCTION(request_queue_t * q,		\
+		     struct request * req,		\
+		     struct request * next,		\
+		     int max_segments)			\
+{							\
+    int ret;						\
+    SANITY_CHECK(req, _CLUSTER, _DMA);			\
+    ret =  __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
+    return ret;						\
+}
+
+/* Version with use_clustering 0 and dma_host 1 is not necessary,
+ * since the only use of dma_host above is protected by use_clustering.
+ */
+MERGEREQFCT(scsi_merge_requests_fn_, 0, 0)
+MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0)
+MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1)
+/*
+ * Function:    __init_io()
+ *
+ * Purpose:     Prototype for io initialize function.
+ *
+ * Arguments:   SCpnt   - Command descriptor we wish to initialize
+ *              sg_count_valid  - 1 if the sg count in the req is valid.
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *
+ * Returns:     1 on success.
+ *
+ * Lock status: 
+ *
+ * Notes:       Only the SCpnt argument should be a non-constant variable.
+ *              This function is designed in such a way that it will be
+ *              invoked from a series of small stubs, each of which would
+ *              be optimized for specific circumstances.
+ *
+ *              The advantage of this is that hosts that don't do DMA
+ *              get versions of the function that essentially don't have
+ *              any of the DMA code.  Same goes for clustering - in the
+ *              case of hosts with no need for clustering, there is no point
+ *              in a whole bunch of overhead.
+ *
+ *              Finally, in the event that a host has set can_queue to SG_ALL
+ *              implying that there is no limit to the length of a scatter
+ *              gather list, the sg count in the request won't be valid
+ *              (mainly because we don't need queue management functions
+ *              which keep the tally uptodate.
+ */
+__inline static int __init_io(Scsi_Cmnd * SCpnt,
+			      int sg_count_valid,
+			      int use_clustering,
+			      int dma_host)
+{
+	struct buffer_head * bh;
+	struct buffer_head * bhprev;
+	char		   * buff;
+	int		     count;
+	int		     i;
+	struct request     * req;
+	int		     sectors;
+	struct scatterlist * sgpnt;
+	int		     this_count;
+	void		   ** bbpnt;
+
+	/*
+	 * FIXME(eric) - don't inline this - it doesn't depend on the
+	 * integer flags.   Come to think of it, I don't think this is even
+	 * needed any more.  Need to play with it and see if we hit the
+	 * panic.  If not, then don't bother.
+	 */
+	if (!SCpnt->request.bh) {
+		/* 
+		 * Case of page request (i.e. raw device), or unlinked buffer 
+		 * Typically used for swapping, but this isn't how we do
+		 * swapping any more.
+		 */
+		panic("I believe this is dead code.  If we hit this, I was wrong");
+#if 0
+		SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9;
+		SCpnt->request_buffer = SCpnt->request.buffer;
+		SCpnt->use_sg = 0;
+		/*
+		 * FIXME(eric) - need to handle DMA here.
+		 */
+#endif
+		return 1;
+	}
+	req = &SCpnt->request;
+	/*
+	 * First we need to know how many scatter gather segments are needed.
+	 */
+	if (!sg_count_valid) {
+		count = __count_segments(req, use_clustering, dma_host, NULL);
+	} else {
+		count = req->nr_segments;
+	}
+
+	/*
+	 * If the dma pool is nearly empty, then queue a minimal request
+	 * with a single segment.  Typically this will satisfy a single
+	 * buffer.
+	 */
+	if (dma_host && scsi_dma_free_sectors <= 10) {
+		this_count = SCpnt->request.current_nr_sectors;
+		goto single_segment;
+	}
+	/*
+	 * Don't bother with scatter-gather if there is only one segment.
+	 */
+	if (count == 1) {
+		this_count = SCpnt->request.nr_sectors;
+		goto single_segment;
+	}
+	SCpnt->use_sg = count;
+
+	/* 
+	 * Allocate the actual scatter-gather table itself.
+	 */
+	SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist));
+
+	/* If we could potentially require ISA bounce buffers, allocate
+	 * space for this array here.
+	 */
+	if (dma_host)
+		SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *));
+
+	/* scsi_malloc can only allocate in chunks of 512 bytes so
+	 * round it up.
+	 */
+	SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511;
+
+	sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len);
+
+	/*
+	 * Now fill the scatter-gather table.
+	 */
+	if (!sgpnt) {
+		/*
+		 * If we cannot allocate the scatter-gather table, then
+		 * simply write the first buffer all by itself.
+		 */
+		printk("Warning - running *really* short on DMA buffers\n");
+		this_count = SCpnt->request.current_nr_sectors;
+		goto single_segment;
+	}
+	/* 
+	 * Next, walk the list, and fill in the addresses and sizes of
+	 * each segment.
+	 */
+	memset(sgpnt, 0, SCpnt->sglist_len);
+	SCpnt->request_buffer = (char *) sgpnt;
+	SCpnt->request_bufflen = 0;
+	bhprev = NULL;
+
+	if (dma_host)
+		bbpnt = (void **) ((char *)sgpnt +
+			 (SCpnt->use_sg * sizeof(struct scatterlist)));
+	else
+		bbpnt = NULL;
+
+	SCpnt->bounce_buffers = bbpnt;
+
+	for (count = 0, bh = SCpnt->request.bh;
+	     bh; bh = bh->b_reqnext) {
+		if (use_clustering && bhprev != NULL) {
+			if (dma_host &&
+			    virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
+				/* Nothing - fall through */
+			} else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
+				/*
+				 * This one is OK.  Let it go.  Note that we
+				 * do not have the ability to allocate
+				 * bounce buffer segments > PAGE_SIZE, so
+				 * for now we limit the thing.
+				 */
+				if( dma_host ) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+					if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD
+					    || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) {
+						sgpnt[count - 1].length += bh->b_size;
+						bhprev = bh;
+						continue;
+					}
+#else
+					sgpnt[count - 1].length += bh->b_size;
+					bhprev = bh;
+					continue;
+#endif
+				} else {
+					sgpnt[count - 1].length += bh->b_size;
+					SCpnt->request_bufflen += bh->b_size;
+					bhprev = bh;
+					continue;
+				}
+			}
+		}
+		count++;
+		sgpnt[count - 1].address = bh->b_data;
+		sgpnt[count - 1].page = NULL;
+		sgpnt[count - 1].length += bh->b_size;
+		if (!dma_host) {
+			SCpnt->request_bufflen += bh->b_size;
+		}
+		bhprev = bh;
+	}
+
+	/*
+	 * Verify that the count is correct.
+	 */
+	if (count != SCpnt->use_sg) {
+		printk("Incorrect number of segments after building list\n");
+#ifdef CONFIG_SCSI_DEBUG_QUEUES
+		dump_stats(req, use_clustering, dma_host, count);
+#endif
+	}
+	if (!dma_host) {
+		return 1;
+	}
+	/*
+	 * Now allocate bounce buffers, if needed.
+	 */
+	SCpnt->request_bufflen = 0;
+	for (i = 0; i < count; i++) {
+		sectors = (sgpnt[i].length >> 9);
+		SCpnt->request_bufflen += sgpnt[i].length;
+		if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 >
+		    ISA_DMA_THRESHOLD) {
+			if( scsi_dma_free_sectors - sectors <= 10  ) {
+				/*
+				 * If this would nearly drain the DMA
+				 * pool empty, then let's stop here.
+				 * Don't make this request any larger.
+				 * This is kind of a safety valve that
+				 * we use - we could get screwed later
+				 * on if we run out completely.  
+				 */
+				SCpnt->request_bufflen -= sgpnt[i].length;
+				SCpnt->use_sg = i;
+				if (i == 0) {
+					goto big_trouble;
+				}
+				break;
+			}
+
+			bbpnt[i] = sgpnt[i].address;
+			sgpnt[i].address =
+			    (char *) scsi_malloc(sgpnt[i].length);
+			/*
+			 * If we cannot allocate memory for this DMA bounce
+			 * buffer, then queue just what we have done so far.
+			 */
+			if (sgpnt[i].address == NULL) {
+				printk("Warning - running low on DMA memory\n");
+				SCpnt->request_bufflen -= sgpnt[i].length;
+				SCpnt->use_sg = i;
+				if (i == 0) {
+					goto big_trouble;
+				}
+				break;
+			}
+			if (SCpnt->request.cmd == WRITE) {
+				memcpy(sgpnt[i].address, bbpnt[i],
+				       sgpnt[i].length);
+			}
+		}
+	}
+	return 1;
+
+      big_trouble:
+	/*
+	 * We come here in the event that we get one humongous
+	 * request, where we need a bounce buffer, and the buffer is
+	 * more than we can allocate in a single call to
+	 * scsi_malloc().  In addition, we only come here when it is
+	 * the 0th element of the scatter-gather table that gets us
+	 * into this trouble.  As a fallback, we fall back to
+	 * non-scatter-gather, and ask for a single segment.  We make
+	 * a half-hearted attempt to pick a reasonably large request
+	 * size mainly so that we don't thrash the thing with
+	 * iddy-biddy requests.
+	 */
+
+	/*
+	 * The original number of sectors in the 0th element of the
+	 * scatter-gather table.  
+	 */
+	sectors = sgpnt[0].length >> 9;
+
+	/* 
+	 * Free up the original scatter-gather table.  Note that since
+	 * it was the 0th element that got us here, we don't have to
+	 * go in and free up memory from the other slots.  
+	 */
+	SCpnt->request_bufflen = 0;
+	SCpnt->use_sg = 0;
+	scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+
+	/*
+	 * Make an attempt to pick up as much as we reasonably can.
+	 * Just keep adding sectors until the pool starts running kind of
+	 * low.  The limit of 30 is somewhat arbitrary - the point is that
+	 * it would kind of suck if we dropped down and limited ourselves to
+	 * single-block requests if we had hundreds of free sectors.
+	 */
+	if( scsi_dma_free_sectors > 30 ) {
+		for (this_count = 0, bh = SCpnt->request.bh;
+		     bh; bh = bh->b_reqnext) {
+			if( scsi_dma_free_sectors - this_count < 30 
+			    || this_count == sectors )
+			{
+				break;
+			}
+			this_count += bh->b_size >> 9;
+		}
+
+	} else {
+		/*
+		 * Yow!   Take the absolute minimum here.
+		 */
+		this_count = SCpnt->request.current_nr_sectors;
+	}
+
+	/*
+	 * Now drop through into the single-segment case.
+	 */
+	
+      single_segment:
+	/*
+	 * Come here if for any reason we choose to do this as a single
+	 * segment.  Possibly the entire request, or possibly a small
+	 * chunk of the entire request.
+	 */
+	bh = SCpnt->request.bh;
+	buff = SCpnt->request.buffer;
+
+	if (dma_host) {
+		/*
+		 * Allocate a DMA bounce buffer.  If the allocation fails, fall
+		 * back and allocate a really small one - enough to satisfy
+		 * the first buffer.
+		 */
+		if (virt_to_phys(SCpnt->request.bh->b_data)
+		    + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
+			buff = (char *) scsi_malloc(this_count << 9);
+			if (!buff) {
+				printk("Warning - running low on DMA memory\n");
+				this_count = SCpnt->request.current_nr_sectors;
+				buff = (char *) scsi_malloc(this_count << 9);
+				if (!buff) {
+					dma_exhausted(SCpnt, 0);
+				}
+			}
+			if (SCpnt->request.cmd == WRITE)
+				memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9);
+		}
+	}
+	SCpnt->request_bufflen = this_count << 9;
+	SCpnt->request_buffer = buff;
+	SCpnt->use_sg = 0;
+	return 1;
+}
+
+#define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA)	\
+static int _FUNCTION(Scsi_Cmnd * SCpnt)			\
+{							\
+    return __init_io(SCpnt, _VALID, _CLUSTER, _DMA);	\
+}
+
+/*
+ * ll_rw_blk.c now keeps track of the number of segments in
+ * a request.  Thus we don't have to do it any more here.
+ * We always force "_VALID" to 1.  Eventually clean this up
+ * and get rid of the extra argument.
+ */
+INITIO(scsi_init_io_v, 1, 0, 0)
+INITIO(scsi_init_io_vd, 1, 0, 1)
+INITIO(scsi_init_io_vc, 1, 1, 0)
+INITIO(scsi_init_io_vdc, 1, 1, 1)
+
+/*
+ * Function:    initialize_merge_fn()
+ *
+ * Purpose:     Initialize merge function for a host
+ *
+ * Arguments:   SHpnt   - Host descriptor.
+ *
+ * Returns:     Nothing.
+ *
+ * Lock status: 
+ *
+ * Notes:
+ */
+void initialize_merge_fn(Scsi_Device * SDpnt)
+{
+	request_queue_t *q;
+	struct Scsi_Host *SHpnt;
+	SHpnt = SDpnt->host;
+
+	q = &SDpnt->request_queue;
+
+	/*
+	 * If the host has already selected a merge manager, then don't
+	 * pick a new one.
+	 */
+#if 0
+	if (q->back_merge_fn && q->front_merge_fn)
+		return;
+#endif
+	/*
+	 * If this host has an unlimited tablesize, then don't bother with a
+	 * merge manager.  The whole point of the operation is to make sure
+	 * that requests don't grow too large, and this host isn't picky.
+	 *
+	 * Note that ll_rw_blk.c is effectively maintaining a segment
+	 * count which is only valid if clustering is used, and it obviously
+	 * doesn't handle the DMA case.   In the end, it
+	 * is simply easier to do it ourselves with our own functions
+	 * rather than rely upon the default behavior of ll_rw_blk.
+	 */
+	if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
+		q->back_merge_fn = scsi_back_merge_fn_;
+		q->front_merge_fn = scsi_front_merge_fn_;
+		q->merge_requests_fn = scsi_merge_requests_fn_;
+		SDpnt->scsi_init_io_fn = scsi_init_io_v;
+	} else if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
+		q->back_merge_fn = scsi_back_merge_fn_;
+		q->front_merge_fn = scsi_front_merge_fn_;
+		q->merge_requests_fn = scsi_merge_requests_fn_;
+		SDpnt->scsi_init_io_fn = scsi_init_io_vd;
+	} else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
+		q->back_merge_fn = scsi_back_merge_fn_c;
+		q->front_merge_fn = scsi_front_merge_fn_c;
+		q->merge_requests_fn = scsi_merge_requests_fn_c;
+		SDpnt->scsi_init_io_fn = scsi_init_io_vc;
+	} else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
+		q->back_merge_fn = scsi_back_merge_fn_dc;
+		q->front_merge_fn = scsi_front_merge_fn_dc;
+		q->merge_requests_fn = scsi_merge_requests_fn_dc;
+		SDpnt->scsi_init_io_fn = scsi_init_io_vdc;
+	}
+}
diff --git a/xen/drivers/scsi/scsi_module.c.inc b/xen/drivers/scsi/scsi_module.c.inc
new file mode 100644
index 0000000000..24099e0f56
--- /dev/null
+++ b/xen/drivers/scsi/scsi_module.c.inc
@@ -0,0 +1,71 @@
+/*
+ *  scsi_module.c Copyright (1994, 1995) Eric Youngdale.
+ *
+ * Support for loading low-level scsi drivers using the linux kernel loadable
+ * module interface.
+ *
+ * To use, the host adapter should first define and initialize the variable
+ * driver_template (datatype Scsi_Host_Template), and then include this file.
+ * This should also be wrapped in a #ifdef MODULE/#endif.
+ *
+ * The low -level driver must also define a release function which will
+ * free any irq assignments, release any dma channels, release any I/O
+ * address space that might be reserved, and otherwise clean up after itself.
+ * The idea is that the same driver should be able to be reloaded without
+ * any difficulty.  This makes debugging new drivers easier, as you should
+ * be able to load the driver, test it, unload, modify and reload.
+ *
+ * One *very* important caveat.  If the driver may need to do DMA on the
+ * ISA bus, you must have unchecked_isa_dma set in the device template,
+ * even if this might be changed during the detect routine.  This is
+ * because the shpnt structure will be allocated in a special way so that
+ * it will be below the appropriate DMA limit - thus if your driver uses
+ * the hostdata field of shpnt, and the board must be able to access this
+ * via DMA, the shpnt structure must be in a DMA accessible region of
+ * memory.  This comment would be relevant for something like the buslogic
+ * driver where there are many boards, only some of which do DMA onto the
+ * ISA bus.  There is no convenient way of specifying whether the host
+ * needs to be in a ISA DMA accessible region of memory when you call
+ * scsi_register.
+ */
+
+#include <xeno/module.h>
+#include <linux/init.h>
+
+static int __init init_this_scsi_driver(void)
+{
+	driver_template.module = THIS_MODULE;
+	scsi_register_module(MODULE_SCSI_HA, &driver_template);
+	if (driver_template.present)
+		return 0;
+
+	scsi_unregister_module(MODULE_SCSI_HA, &driver_template);
+	return -ENODEV;
+}
+
+static void __exit exit_this_scsi_driver(void)
+{
+	scsi_unregister_module(MODULE_SCSI_HA, &driver_template);
+}
+
+module_init(init_this_scsi_driver);
+module_exit(exit_this_scsi_driver);
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_obsolete.h b/xen/drivers/scsi/scsi_obsolete.h
new file mode 100644
index 0000000000..abeacb996e
--- /dev/null
+++ b/xen/drivers/scsi/scsi_obsolete.h
@@ -0,0 +1,106 @@
+/*
+ *  scsi_obsolete.h Copyright (C) 1997 Eric Youngdale
+ *
+ */
+
+#ifndef _SCSI_OBSOLETE_H
+#define _SCSI_OBSOLETE_H
+
+/*
+ * These are the return codes for the abort and reset functions.  The mid-level
+ * code uses these to decide what to do next.  Each of the low level abort
+ * and reset functions must correctly indicate what it has done.
+ * The descriptions are written from the point of view of the mid-level code,
+ * so that the return code is telling the mid-level drivers exactly what
+ * the low level driver has already done, and what remains to be done.
+ */
+
+/* We did not do anything.  
+ * Wait some more for this command to complete, and if this does not work, 
+ * try something more serious. */
+#define SCSI_ABORT_SNOOZE 0
+
+/* This means that we were able to abort the command.  We have already
+ * called the mid-level done function, and do not expect an interrupt that 
+ * will lead to another call to the mid-level done function for this command */
+#define SCSI_ABORT_SUCCESS 1
+
+/* We called for an abort of this command, and we should get an interrupt 
+ * when this succeeds.  Thus we should not restore the timer for this
+ * command in the mid-level abort function. */
+#define SCSI_ABORT_PENDING 2
+
+/* Unable to abort - command is currently on the bus.  Grin and bear it. */
+#define SCSI_ABORT_BUSY 3
+
+/* The command is not active in the low level code. Command probably
+ * finished. */
+#define SCSI_ABORT_NOT_RUNNING 4
+
+/* Something went wrong.  The low level driver will indicate the correct
+ * error condition when it calls scsi_done, so the mid-level abort function
+ * can simply wait until this comes through */
+#define SCSI_ABORT_ERROR 5
+
+/* We do not know how to reset the bus, or we do not want to.  Bummer.
+ * Anyway, just wait a little more for the command in question, and hope that
+ * it eventually finishes.  If it never finishes, the SCSI device could
+ * hang, so use this with caution. */
+#define SCSI_RESET_SNOOZE 0
+
+/* We do not know how to reset the bus, or we do not want to.  Bummer.
+ * We have given up on this ever completing.  The mid-level code will
+ * request sense information to decide how to proceed from here. */
+#define SCSI_RESET_PUNT 1
+
+/* This means that we were able to reset the bus.  We have restarted all of
+ * the commands that should be restarted, and we should be able to continue
+ * on normally from here.  We do not expect any interrupts that will return
+ * DID_RESET to any of the other commands in the host_queue, and the mid-level
+ * code does not need to do anything special to keep the commands alive. 
+ * If a hard reset was performed then all outstanding commands on the
+ * bus have been restarted. */
+#define SCSI_RESET_SUCCESS 2
+
+/* We called for a reset of this bus, and we should get an interrupt 
+ * when this succeeds.  Each command should get its own status
+ * passed up to scsi_done, but this has not happened yet. 
+ * If a hard reset was performed, then we expect an interrupt
+ * for *each* of the outstanding commands that will have the
+ * effect of restarting the commands.
+ */
+#define SCSI_RESET_PENDING 3
+
+/* We did a reset, but do not expect an interrupt to signal DID_RESET.
+ * This tells the upper level code to request the sense info, and this
+ * should keep the command alive. */
+#define SCSI_RESET_WAKEUP 4
+
+/* The command is not active in the low level code. Command probably
+   finished. */
+#define SCSI_RESET_NOT_RUNNING 5
+
+/* Something went wrong, and we do not know how to fix it. */
+#define SCSI_RESET_ERROR 6
+
+#define SCSI_RESET_SYNCHRONOUS		0x01
+#define SCSI_RESET_ASYNCHRONOUS		0x02
+#define SCSI_RESET_SUGGEST_BUS_RESET	0x04
+#define SCSI_RESET_SUGGEST_HOST_RESET	0x08
+/*
+ * This is a bitmask that is ored with one of the above codes.
+ * It tells the mid-level code that we did a hard reset.
+ */
+#define SCSI_RESET_BUS_RESET 0x100
+/*
+ * This is a bitmask that is ored with one of the above codes.
+ * It tells the mid-level code that we did a host adapter reset.
+ */
+#define SCSI_RESET_HOST_RESET 0x200
+/*
+ * Used to mask off bits and to obtain the basic action that was
+ * performed.  
+ */
+#define SCSI_RESET_ACTION   0xff
+
+#endif				/* SCSI_OBSOLETE_H */
diff --git a/xen/drivers/scsi/scsi_proc.c b/xen/drivers/scsi/scsi_proc.c
new file mode 100644
index 0000000000..41a5f7cc49
--- /dev/null
+++ b/xen/drivers/scsi/scsi_proc.c
@@ -0,0 +1,329 @@
+/*
+ * linux/drivers/scsi/scsi_proc.c
+ *
+ * The functions in this file provide an interface between
+ * the PROC file system and the SCSI device drivers
+ * It is mainly used for debugging, statistics and to pass 
+ * information directly to the lowlevel driver.
+ *
+ * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de 
+ * Version: 0.99.8   last change: 95/09/13
+ * 
+ * generic command parser provided by: 
+ * Andreas Heilwagen <crashcar@informatik.uni-koblenz.de>
+ *
+ * generic_proc_info() support of xxxx_info() by:
+ * Michael A. Griffith <grif@acm.org>
+ */
+
+#include <xeno/config.h>	/* for CONFIG_PROC_FS */
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+/*  #include <xeno/string.h> */
+/*  #include <xeno/mm.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/proc_fs.h> */
+/*  #include <xeno/errno.h> */
+/*  #include <xeno/stat.h> */
+#include <xeno/blk.h>
+
+#include <asm/uaccess.h>
+
+#include "scsi.h"
+#include "hosts.h"
+
+#ifndef TRUE
+#define TRUE  1
+#define FALSE 0
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+/* generic_proc_info
+ * Used if the driver currently has no own support for /proc/scsi
+ */
+int generic_proc_info(char *buffer, char **start, off_t offset, int length, 
+		      const char *(*info) (struct Scsi_Host *),
+		      struct Scsi_Host *sh)
+{
+	int len, pos, begin;
+
+	begin = 0;
+	if (info && sh) {
+		pos = len = sprintf(buffer, "%s\n", info(sh));
+	} else {
+		pos = len = sprintf(buffer,
+			"The driver does not yet support the proc-fs\n");
+	}
+	if (pos < offset) {
+		len = 0;
+		begin = pos;
+	}
+	*start = buffer + (offset - begin);	/* Start of wanted data */
+	len -= (offset - begin);
+	if (len > length)
+		len = length;
+
+	return (len);
+}
+
+/* dispatch_scsi_info is the central dispatcher 
+ * It is the interface between the proc-fs and the SCSI subsystem code
+ */
+static int proc_scsi_read(char *buffer, char **start, off_t offset,
+	int length, int *eof, void *data)
+{
+	struct Scsi_Host *hpnt = data;
+	int n;
+
+	if (hpnt->hostt->proc_info == NULL)
+		n = generic_proc_info(buffer, start, offset, length,
+				      hpnt->hostt->info, hpnt);
+	else
+		n = (hpnt->hostt->proc_info(buffer, start, offset,
+					   length, hpnt->host_no, 0));
+	*eof = (n<length);
+	return n;
+}
+
+#define PROC_BLOCK_SIZE (3*1024)     /* 4K page size, but our output routines 
+				      * use some slack for overruns 
+				      */
+
+static int proc_scsi_write(struct file * file, const char * buf,
+                           unsigned long count, void *data)
+{
+	struct Scsi_Host *hpnt = data;
+	ssize_t ret = 0;
+	char * page;
+	char *start;
+    
+	if (hpnt->hostt->proc_info == NULL)
+		ret = -ENOSYS;
+
+	if (count > PROC_BLOCK_SIZE)
+		return -EOVERFLOW;
+
+	if (!(page = (char *) __get_free_page(GFP_KERNEL)))
+		return -ENOMEM;
+	if(copy_from_user(page, buf, count))
+	{
+		free_page((ulong) page);
+		return -EFAULT;
+	}
+
+	ret = hpnt->hostt->proc_info(page, &start, 0, count,
+				     hpnt->host_no, 1);
+
+	free_page((ulong) page);
+	return(ret);
+}
+
+void build_proc_dir_entries(Scsi_Host_Template * tpnt)
+{
+	struct Scsi_Host *hpnt;
+	char name[10];	/* see scsi_unregister_host() */
+
+	tpnt->proc_dir = proc_mkdir(tpnt->proc_name, proc_scsi);
+        if (!tpnt->proc_dir) {
+                printk(KERN_ERR "Unable to proc_mkdir in scsi.c/build_proc_dir_entries");
+                return;
+        }
+	tpnt->proc_dir->owner = tpnt->module;
+
+	hpnt = scsi_hostlist;
+	while (hpnt) {
+		if (tpnt == hpnt->hostt) {
+			struct proc_dir_entry *p;
+			sprintf(name,"%d",hpnt->host_no);
+			p = create_proc_read_entry(name,
+					S_IFREG | S_IRUGO | S_IWUSR,
+					tpnt->proc_dir,
+					proc_scsi_read,
+					(void *)hpnt);
+			if (!p)
+				panic("Not enough memory to register SCSI HBA in /proc/scsi !\n");
+			p->write_proc=proc_scsi_write;
+			p->owner = tpnt->module;
+		}
+		hpnt = hpnt->next;
+	}
+}
+
+/*
+ *  parseHandle *parseInit(char *buf, char *cmdList, int cmdNum); 
+ *              gets a pointer to a null terminated data buffer
+ *              and a list of commands with blanks as delimiter 
+ *      in between. 
+ *      The commands have to be alphanumerically sorted. 
+ *      cmdNum has to contain the number of commands.
+ *              On success, a pointer to a handle structure
+ *              is returned, NULL on failure
+ *
+ *      int parseOpt(parseHandle *handle, char **param);
+ *              processes the next parameter. On success, the
+ *              index of the appropriate command in the cmdList
+ *              is returned, starting with zero.
+ *              param points to the null terminated parameter string.
+ *              On failure, -1 is returned.
+ *
+ *      The databuffer buf may only contain pairs of commands
+ *          options, separated by blanks:
+ *              <Command> <Parameter> [<Command> <Parameter>]*
+ */
+
+typedef struct {
+	char *buf,		/* command buffer  */
+	*cmdList,		/* command list    */
+	*bufPos,		/* actual position */
+	**cmdPos,		/* cmdList index   */
+	 cmdNum;		/* cmd number      */
+} parseHandle;
+
+inline int parseFree(parseHandle * handle)
+{				/* free memory     */
+	kfree(handle->cmdPos);
+	kfree(handle);
+
+	return -1;
+}
+
+parseHandle *parseInit(char *buf, char *cmdList, int cmdNum)
+{
+	char *ptr;		/* temp pointer    */
+	parseHandle *handle;	/* new handle      */
+
+	if (!buf || !cmdList)	/* bad input ?     */
+		return NULL;
+	handle = (parseHandle *) kmalloc(sizeof(parseHandle), GFP_KERNEL);
+	if (!handle)
+		return NULL;	/* out of memory   */
+	handle->cmdPos = (char **) kmalloc(sizeof(int) * cmdNum, GFP_KERNEL);
+	if (!handle->cmdPos) {
+		kfree(handle);
+		return NULL;	/* out of memory   */
+	}
+	handle->buf = handle->bufPos = buf;	/* init handle     */
+	handle->cmdList = cmdList;
+	handle->cmdNum = cmdNum;
+
+	handle->cmdPos[cmdNum = 0] = cmdList;
+	for (ptr = cmdList; *ptr; ptr++) {	/* scan command string */
+		if (*ptr == ' ') {	/* and insert zeroes   */
+			*ptr++ = 0;
+			handle->cmdPos[++cmdNum] = ptr++;
+		}
+	}
+	return handle;
+}
+
+int parseOpt(parseHandle * handle, char **param)
+{
+	int cmdIndex = 0, cmdLen = 0;
+	char *startPos;
+
+	if (!handle)		/* invalid handle  */
+		return (parseFree(handle));
+	/* skip spaces     */
+	for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++);
+	if (!*(handle->bufPos))
+		return (parseFree(handle));	/* end of data     */
+
+	startPos = handle->bufPos;	/* store cmd start */
+	for (; handle->cmdPos[cmdIndex][cmdLen] && *(handle->bufPos); handle->bufPos++) {	/* no string end?  */
+		for (;;) {
+			if (*(handle->bufPos) == handle->cmdPos[cmdIndex][cmdLen])
+				break;	/* char matches ?  */
+			else if (memcmp(startPos, (char *) (handle->cmdPos[++cmdIndex]), cmdLen))
+				return (parseFree(handle));	/* unknown command */
+
+			if (cmdIndex >= handle->cmdNum)
+				return (parseFree(handle));	/* unknown command */
+		}
+
+		cmdLen++;	/* next char       */
+	}
+
+	/* Get param. First skip all blanks, then insert zero after param  */
+
+	for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++);
+	*param = handle->bufPos;
+
+	for (; *(handle->bufPos) && *(handle->bufPos) != ' '; handle->bufPos++);
+	*(handle->bufPos++) = 0;
+
+	return (cmdIndex);
+}
+
+void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len)
+{
+
+	int x, y = *size;
+	extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
+
+	y = sprintf(buffer + len,
+	     "Host: scsi%d Channel: %02d Id: %02d Lun: %02d\n  Vendor: ",
+		    scd->host->host_no, scd->channel, scd->id, scd->lun);
+	for (x = 0; x < 8; x++) {
+		if (scd->vendor[x] >= 0x20)
+			y += sprintf(buffer + len + y, "%c", scd->vendor[x]);
+		else
+			y += sprintf(buffer + len + y, " ");
+	}
+	y += sprintf(buffer + len + y, " Model: ");
+	for (x = 0; x < 16; x++) {
+		if (scd->model[x] >= 0x20)
+			y += sprintf(buffer + len + y, "%c", scd->model[x]);
+		else
+			y += sprintf(buffer + len + y, " ");
+	}
+	y += sprintf(buffer + len + y, " Rev: ");
+	for (x = 0; x < 4; x++) {
+		if (scd->rev[x] >= 0x20)
+			y += sprintf(buffer + len + y, "%c", scd->rev[x]);
+		else
+			y += sprintf(buffer + len + y, " ");
+	}
+	y += sprintf(buffer + len + y, "\n");
+
+	y += sprintf(buffer + len + y, "  Type:   %s ",
+		     scd->type < MAX_SCSI_DEVICE_CODE ?
+	       scsi_device_types[(int) scd->type] : "Unknown          ");
+	y += sprintf(buffer + len + y, "               ANSI"
+		     " SCSI revision: %02x", (scd->scsi_level - 1) ? scd->scsi_level - 1 : 1);
+	if (scd->scsi_level == 2)
+		y += sprintf(buffer + len + y, " CCS\n");
+	else
+		y += sprintf(buffer + len + y, "\n");
+
+	*size = y;
+	return;
+}
+
+#else				/* if !CONFIG_PROC_FS */
+
+void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len)
+{
+}
+
+#endif				/* CONFIG_PROC_FS */
+
+/*
+ * Overrides for Emacs so that we get a uniform tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/drivers/scsi/scsi_queue.c b/xen/drivers/scsi/scsi_queue.c
new file mode 100644
index 0000000000..ce790c9d11
--- /dev/null
+++ b/xen/drivers/scsi/scsi_queue.c
@@ -0,0 +1,151 @@
+/*
+ *  scsi_queue.c Copyright (C) 1997 Eric Youngdale
+ *
+ *  generic mid-level SCSI queueing.
+ *
+ *  The point of this is that we need to track when hosts are unable to
+ *  accept a command because they are busy.  In addition, we track devices
+ *  that cannot accept a command because of a QUEUE_FULL condition.  In both
+ *  of these cases, we enter the command in the queue.  At some later point,
+ *  we attempt to remove commands from the queue and retry them.
+ */
+
+#define __NO_VERSION__
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/*  #include <xeno/string.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/ioport.h> */
+/*  #include <xeno/kernel.h> */
+/*  #include <xeno/stat.h> */
+#include <xeno/blk.h>
+/*  #include <xeno/interrupt.h> */
+/*  #include <xeno/delay.h> */
+/*  #include <xeno/smp_lock.h> */
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+/*
+ * TODO:
+ *      1) Prevent multiple traversals of list to look for commands to
+ *         queue.
+ *      2) Protect against multiple insertions of list at the same time.
+ * DONE:
+ *      1) Set state of scsi command to a new state value for ml queue.
+ *      2) Insert into queue when host rejects command.
+ *      3) Make sure status code is properly passed from low-level queue func
+ *         so that internal_cmnd properly returns the right value.
+ *      4) Insert into queue when QUEUE_FULL.
+ *      5) Cull queue in bottom half handler.
+ *      6) Check usage count prior to queue insertion.  Requeue if usage
+ *         count is 0.
+ *      7) Don't send down any more commands if the host/device is busy.
+ */
+
+static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_queue.c,v 1.1 1997/10/21 11:16:38 eric Exp $";
+
+
+/*
+ * Function:    scsi_mlqueue_insert()
+ *
+ * Purpose:     Insert a command in the midlevel queue.
+ *
+ * Arguments:   cmd    - command that we are adding to queue.
+ *              reason - why we are inserting command to queue.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       We do this for one of two cases.  Either the host is busy
+ *              and it cannot accept any more commands for the time being,
+ *              or the device returned QUEUE_FULL and can accept no more
+ *              commands.
+ * Notes:       This could be called either from an interrupt context or a
+ *              normal process context.
+ */
+int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason)
+{
+	struct Scsi_Host *host;
+	unsigned long flags;
+
+	SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd));
+
+	/*
+	 * We are inserting the command into the ml queue.  First, we
+	 * cancel the timer, so it doesn't time out.
+	 */
+	scsi_delete_timer(cmd);
+
+	host = cmd->host;
+
+	/*
+	 * Next, set the appropriate busy bit for the device/host.
+	 */
+	if (reason == SCSI_MLQUEUE_HOST_BUSY) {
+		/*
+		 * Protect against race conditions.  If the host isn't busy,
+		 * assume that something actually completed, and that we should
+		 * be able to queue a command now.  Note that there is an implicit
+		 * assumption that every host can always queue at least one command.
+		 * If a host is inactive and cannot queue any commands, I don't see
+		 * how things could possibly work anyways.
+		 */
+		if (host->host_busy == 0) {
+			if (scsi_retry_command(cmd) == 0) {
+				return 0;
+			}
+		}
+		host->host_blocked = TRUE;
+	} else {
+		/*
+		 * Protect against race conditions.  If the device isn't busy,
+		 * assume that something actually completed, and that we should
+		 * be able to queue a command now.  Note that there is an implicit
+		 * assumption that every host can always queue at least one command.
+		 * If a host is inactive and cannot queue any commands, I don't see
+		 * how things could possibly work anyways.
+		 */
+		if (cmd->device->device_busy == 0) {
+			if (scsi_retry_command(cmd) == 0) {
+				return 0;
+			}
+		}
+		cmd->device->device_blocked = TRUE;
+	}
+
+	/*
+	 * Register the fact that we own the thing for now.
+	 */
+	cmd->state = SCSI_STATE_MLQUEUE;
+	cmd->owner = SCSI_OWNER_MIDLEVEL;
+	cmd->bh_next = NULL;
+
+	/*
+	 * Decrement the counters, since these commands are no longer
+	 * active on the host/device.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+	cmd->host->host_busy--;
+	cmd->device->device_busy--;
+	spin_unlock_irqrestore(&io_request_lock, flags);
+
+	/*
+	 * Insert this command at the head of the queue for it's device.
+	 * It will go before all other commands that are already in the queue.
+	 */
+	scsi_insert_special_cmd(cmd, 1);
+	return 0;
+}
diff --git a/xen/drivers/scsi/scsi_scan.c b/xen/drivers/scsi/scsi_scan.c
new file mode 100644
index 0000000000..04f4715992
--- /dev/null
+++ b/xen/drivers/scsi/scsi_scan.c
@@ -0,0 +1,906 @@
+/*
+ *  scsi_scan.c Copyright (C) 2000 Eric Youngdale
+ *
+ *  Bus scan logic.
+ *
+ *  This used to live in scsi.c, but that file was just a laundry basket
+ *  full of misc stuff.  This got separated out in order to make things
+ *  clearer.
+ */
+
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/init.h>
+
+#include <xeno/blk.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+/* 
+ * Flags for irregular SCSI devices that need special treatment 
+ */
+#define BLIST_NOLUN     	0x001	/* Don't scan for LUNs */
+#define BLIST_FORCELUN  	0x002	/* Known to have LUNs, force sanning */
+#define BLIST_BORKEN    	0x004	/* Flag for broken handshaking */
+#define BLIST_KEY       	0x008	/* Needs to be unlocked by special command */
+#define BLIST_SINGLELUN 	0x010	/* LUNs should better not be used in parallel */
+#define BLIST_NOTQ		0x020	/* Buggy Tagged Command Queuing */
+#define BLIST_SPARSELUN 	0x040	/* Non consecutive LUN numbering */
+#define BLIST_MAX5LUN		0x080	/* Avoid LUNS >= 5 */
+#define BLIST_ISDISK    	0x100	/* Treat as (removable) disk */
+#define BLIST_ISROM     	0x200	/* Treat as (removable) CD-ROM */
+#define BLIST_LARGELUN		0x400	/* LUNs larger than 7 despite reporting as SCSI 2 */
+
+static void print_inquiry(unsigned char *data);
+static int scan_scsis_single(unsigned int channel, unsigned int dev,
+		unsigned int lun, int lun0_scsi_level, 
+		unsigned int *max_scsi_dev, unsigned int *sparse_lun, 
+		Scsi_Device ** SDpnt, struct Scsi_Host *shpnt, 
+		char *scsi_result);
+static int find_lun0_scsi_level(unsigned int channel, unsigned int dev,
+				struct Scsi_Host *shpnt);
+
+struct dev_info {
+	const char *vendor;
+	const char *model;
+	const char *revision;	/* Latest revision known to be bad.  Not used yet */
+	unsigned flags;
+};
+
+/*
+ * This is what was previously known as the blacklist.  The concept
+ * has been expanded so that we can specify other types of things we
+ * need to be aware of.
+ */
+static struct dev_info device_list[] =
+{
+/* The following devices are known not to tolerate a lun != 0 scan for
+ * one reason or another.  Some will respond to all luns, others will
+ * lock up.
+ */
+	{"Aashima", "IMAGERY 2400SP", "1.03", BLIST_NOLUN},	/* Locks up if polled for lun != 0 */
+	{"CHINON", "CD-ROM CDS-431", "H42", BLIST_NOLUN},	/* Locks up if polled for lun != 0 */
+	{"CHINON", "CD-ROM CDS-535", "Q14", BLIST_NOLUN},	/* Locks up if polled for lun != 0 */
+	{"DENON", "DRD-25X", "V", BLIST_NOLUN},			/* Locks up if probed for lun != 0 */
+	{"HITACHI", "DK312C", "CM81", BLIST_NOLUN},		/* Responds to all lun - dtg */
+	{"HITACHI", "DK314C", "CR21", BLIST_NOLUN},		/* responds to all lun */
+	{"IMS", "CDD521/10", "2.06", BLIST_NOLUN},		/* Locks-up when LUN>0 polled. */
+	{"MAXTOR", "XT-3280", "PR02", BLIST_NOLUN},		/* Locks-up when LUN>0 polled. */
+	{"MAXTOR", "XT-4380S", "B3C", BLIST_NOLUN},		/* Locks-up when LUN>0 polled. */
+	{"MAXTOR", "MXT-1240S", "I1.2", BLIST_NOLUN},		/* Locks up when LUN>0 polled */
+	{"MAXTOR", "XT-4170S", "B5A", BLIST_NOLUN},		/* Locks-up sometimes when LUN>0 polled. */
+	{"MAXTOR", "XT-8760S", "B7B", BLIST_NOLUN},		/* guess what? */
+	{"MEDIAVIS", "RENO CD-ROMX2A", "2.03", BLIST_NOLUN},	/*Responds to all lun */
+	{"NEC", "CD-ROM DRIVE:841", "1.0", BLIST_NOLUN},	/* Locks-up when LUN>0 polled. */
+	{"PHILIPS", "PCA80SC", "V4-2", BLIST_NOLUN},		/* Responds to all lun */
+	{"RODIME", "RO3000S", "2.33", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"SANYO", "CRD-250S", "1.20", BLIST_NOLUN},		/* causes failed REQUEST SENSE on lun 1
+								 * for aha152x controller, which causes
+								 * SCSI code to reset bus.*/
+	{"SEAGATE", "ST157N", "\004|j", BLIST_NOLUN},		/* causes failed REQUEST SENSE on lun 1
+								 * for aha152x controller, which causes
+								 * SCSI code to reset bus.*/
+	{"SEAGATE", "ST296", "921", BLIST_NOLUN},		/* Responds to all lun */
+	{"SEAGATE", "ST1581", "6538", BLIST_NOLUN},		/* Responds to all lun */
+	{"SONY", "CD-ROM CDU-541", "4.3d", BLIST_NOLUN},	
+	{"SONY", "CD-ROM CDU-55S", "1.0i", BLIST_NOLUN},
+	{"SONY", "CD-ROM CDU-561", "1.7x", BLIST_NOLUN},
+	{"SONY", "CD-ROM CDU-8012", "*", BLIST_NOLUN},
+	{"TANDBERG", "TDC 3600", "U07", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"TEAC", "CD-R55S", "1.0H", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"TEAC", "CD-ROM", "1.06", BLIST_NOLUN},		/* causes failed REQUEST SENSE on lun 1
+								 * for seagate controller, which causes
+								 * SCSI code to reset bus.*/
+	{"TEAC", "MT-2ST/45S2-27", "RV M", BLIST_NOLUN},	/* Responds to all lun */
+	{"TEXEL", "CD-ROM", "1.06", BLIST_NOLUN},		/* causes failed REQUEST SENSE on lun 1
+								 * for seagate controller, which causes
+								 * SCSI code to reset bus.*/
+	{"QUANTUM", "LPS525S", "3110", BLIST_NOLUN},		/* Locks sometimes if polled for lun != 0 */
+	{"QUANTUM", "PD1225S", "3110", BLIST_NOLUN},		/* Locks sometimes if polled for lun != 0 */
+	{"QUANTUM", "FIREBALL ST4.3S", "0F0C", BLIST_NOLUN},	/* Locks up when polled for lun != 0 */
+	{"MEDIAVIS", "CDR-H93MV", "1.31", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"SANKYO", "CP525", "6.64", BLIST_NOLUN},		/* causes failed REQ SENSE, extra reset */
+	{"HP", "C1750A", "3226", BLIST_NOLUN},			/* scanjet iic */
+	{"HP", "C1790A", "", BLIST_NOLUN},			/* scanjet iip */
+	{"HP", "C2500A", "", BLIST_NOLUN},			/* scanjet iicx */
+	{"HP", "A6188A", "*", BLIST_SPARSELUN},			/* HP Va7100 Array */
+	{"HP", "A6189A", "*", BLIST_SPARSELUN},			/* HP Va7400 Array */
+	{"HP", "A6189B", "*", BLIST_SPARSELUN},			/* HP Va7410 Array */
+	{"HP", "OPEN-", "*", BLIST_SPARSELUN},			/* HP XP Arrays */
+	{"YAMAHA", "CDR100", "1.00", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"YAMAHA", "CDR102", "1.00", BLIST_NOLUN},		/* Locks up if polled for lun != 0  
+								 * extra reset */
+	{"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"YAMAHA", "CRW6416S", "1.0c", BLIST_NOLUN},		/* Locks up if polled for lun != 0 */
+	{"MITSUMI", "CD-R CR-2201CS", "6119", BLIST_NOLUN},	/* Locks up if polled for lun != 0 */
+	{"RELISYS", "Scorpio", "*", BLIST_NOLUN},		/* responds to all LUN */
+	{"RELISYS", "VM3530+", "*", BLIST_NOLUN},		/* responds to all LUN */
+	{"ACROSS", "", "*", BLIST_NOLUN},			/* responds to all LUN */
+	{"MICROTEK", "ScanMaker II", "5.61", BLIST_NOLUN},	/* responds to all LUN */
+
+/*
+ * Other types of devices that have special flags.
+ */
+	{"SONY", "CD-ROM CDU-8001", "*", BLIST_BORKEN},
+	{"TEXEL", "CD-ROM", "1.06", BLIST_BORKEN},
+	{"IOMEGA", "Io20S         *F", "*", BLIST_KEY},
+	{"INSITE", "Floptical   F*8I", "*", BLIST_KEY},
+	{"INSITE", "I325VM", "*", BLIST_KEY},
+	{"LASOUND","CDX7405","3.10", BLIST_MAX5LUN | BLIST_SINGLELUN},
+	{"MICROP", "4110", "*", BLIST_NOTQ},			/* Buggy Tagged Queuing */
+	{"NRC", "MBR-7", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"NRC", "MBR-7.4", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"REGAL", "CDC-4X", "*", BLIST_MAX5LUN | BLIST_SINGLELUN},
+	{"NAKAMICH", "MJ-4.8S", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"NAKAMICH", "MJ-5.16S", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"PIONEER", "CD-ROM DRM-600", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"PIONEER", "CD-ROM DRM-602X", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"PIONEER", "CD-ROM DRM-604X", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"EMULEX", "MD21/S2     ESDI", "*", BLIST_SINGLELUN},
+	{"CANON", "IPUBJD", "*", BLIST_SPARSELUN},
+	{"nCipher", "Fastness Crypto", "*", BLIST_FORCELUN},
+	{"DEC","HSG80","*", BLIST_FORCELUN},
+	{"COMPAQ","LOGICAL VOLUME","*", BLIST_FORCELUN},
+	{"COMPAQ","CR3500","*", BLIST_FORCELUN},
+	{"NEC", "PD-1 ODX654P", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"MATSHITA", "PD-1", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+	{"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN},
+ 	{"TOSHIBA","CDROM","*", BLIST_ISROM},
+ 	{"TOSHIBA","CD-ROM","*", BLIST_ISROM},
+	{"MegaRAID", "LD", "*", BLIST_FORCELUN},
+	{"DGC",  "RAID",      "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (tgt @ LUN 0)
+	{"DGC",  "DISK",      "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (no tgt @ LUN 0) 
+	{"DELL", "PV660F",   "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+	{"DELL", "PV660F   PSEUDO",   "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+	{"DELL", "PSEUDO DEVICE .",   "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F
+	{"DELL", "PV530F",    "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F
+	{"EMC", "SYMMETRIX", "*", BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN},
+	{"HP", "A6189A", "*", BLIST_SPARSELUN |  BLIST_LARGELUN}, // HP VA7400, by Alar Aun
+	{"CMD", "CRA-7280", "*", BLIST_SPARSELUN | BLIST_LARGELUN},   // CMD RAID Controller
+	{"CNSI", "G7324", "*", BLIST_SPARSELUN | BLIST_LARGELUN},     // Chaparral G7324 RAID
+	{"CNSi", "G8324", "*", BLIST_SPARSELUN},     // Chaparral G8324 RAID
+	{"Zzyzx", "RocketStor 500S", "*", BLIST_SPARSELUN},
+	{"Zzyzx", "RocketStor 2000", "*", BLIST_SPARSELUN},
+	{"SONY", "TSL",       "*", BLIST_FORCELUN},  // DDS3 & DDS4 autoloaders
+	{"DELL", "PERCRAID", "*", BLIST_FORCELUN},
+	{"HP", "NetRAID-4M", "*", BLIST_FORCELUN},
+	{"ADAPTEC", "AACRAID", "*", BLIST_FORCELUN},
+	{"ADAPTEC", "Adaptec 5400S", "*", BLIST_FORCELUN},
+	{"COMPAQ", "MSA1000", "*", BLIST_FORCELUN},
+	{"HP", "C1557A", "*", BLIST_FORCELUN},
+	{"IBM", "AuSaV1S2", "*", BLIST_FORCELUN},
+	{"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+	{"DDN", "SAN DataDirector", "*", BLIST_SPARSELUN},
+	{"HITACHI", "DF400", "*", BLIST_SPARSELUN},
+	{"HITACHI", "DF500", "*", BLIST_SPARSELUN},
+	{"HITACHI", "DF600", "*", BLIST_SPARSELUN},
+
+	/*
+	 * Must be at end of list...
+	 */
+	{NULL, NULL, NULL}
+};
+
+#define MAX_SCSI_LUNS 0xFFFFFFFF
+
+#ifdef CONFIG_SCSI_MULTI_LUN
+static unsigned int max_scsi_luns = MAX_SCSI_LUNS;
+#else
+static unsigned int max_scsi_luns = 1;
+#endif
+
+#ifdef MODULE
+
+MODULE_PARM(max_scsi_luns, "i");
+MODULE_PARM_DESC(max_scsi_luns, "last scsi LUN (should be between 1 and 2^32-1)");
+
+#else
+
+static int __init scsi_luns_setup(char *str)
+{
+#if 0
+	unsigned int tmp;
+
+	if (get_option(&str, &tmp) == 1) {
+		max_scsi_luns = tmp;
+		return 1;
+	} else {
+		printk("scsi_luns_setup : usage max_scsi_luns=n "
+		       "(n should be between 1 and 2^32-1)\n");
+		return 0;
+	}
+#else
+	return 0;
+#endif
+}
+
+__setup("max_scsi_luns=", scsi_luns_setup);
+
+#endif
+
+static void print_inquiry(unsigned char *data)
+{
+	int i;
+
+	printk("  Vendor: ");
+	for (i = 8; i < 16; i++) {
+		if (data[i] >= 0x20 && i < data[4] + 5)
+			printk("%c", data[i]);
+		else
+			printk(" ");
+	}
+
+	printk("  Model: ");
+	for (i = 16; i < 32; i++) {
+		if (data[i] >= 0x20 && i < data[4] + 5)
+			printk("%c", data[i]);
+		else
+			printk(" ");
+	}
+
+	printk("  Rev: ");
+	for (i = 32; i < 36; i++) {
+		if (data[i] >= 0x20 && i < data[4] + 5)
+			printk("%c", data[i]);
+		else
+			printk(" ");
+	}
+
+	printk("\n");
+
+	i = data[0] & 0x1f;
+
+	printk("  Type:   %s ",
+	       i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] : "Unknown          ");
+	printk("                 ANSI SCSI revision: %02x", data[2] & 0x07);
+	if ((data[2] & 0x07) == 1 && (data[3] & 0x0f) == 1)
+		printk(" CCS\n");
+	else
+		printk("\n");
+}
+
+static int get_device_flags(unsigned char *response_data)
+{
+	int i = 0;
+	unsigned char *pnt;
+	for (i = 0; 1; i++) {
+		if (device_list[i].vendor == NULL)
+			return 0;
+		pnt = &response_data[8];
+		while (*pnt && *pnt == ' ')
+			pnt++;
+		if (memcmp(device_list[i].vendor, pnt,
+			   strlen(device_list[i].vendor)))
+			continue;
+		pnt = &response_data[16];
+		while (*pnt && *pnt == ' ')
+			pnt++;
+		if (memcmp(device_list[i].model, pnt,
+			   strlen(device_list[i].model)))
+			continue;
+		return device_list[i].flags;
+	}
+	return 0;
+}
+
+/*
+ *  Detecting SCSI devices :
+ *  We scan all present host adapter's busses,  from ID 0 to ID (max_id).
+ *  We use the INQUIRY command, determine device type, and pass the ID /
+ *  lun address of all sequential devices to the tape driver, all random
+ *  devices to the disk driver.
+ */
+void scan_scsis(struct Scsi_Host *shpnt,
+		       uint hardcoded,
+		       uint hchannel,
+		       uint hid,
+		       uint hlun)
+{
+	uint channel;
+	unsigned int dev;
+	unsigned int lun;
+	unsigned int max_dev_lun;
+	unsigned char *scsi_result;
+	unsigned char scsi_result0[256];
+	Scsi_Device *SDpnt;
+	Scsi_Device *SDtail;
+	unsigned int sparse_lun;
+	int lun0_sl;
+
+	scsi_result = NULL;
+
+	SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device),
+					GFP_ATOMIC);
+	if (SDpnt) {
+		memset(SDpnt, 0, sizeof(Scsi_Device));
+		/*
+		 * Register the queue for the device.  All I/O requests will
+		 * come in through here.  We also need to register a pointer to
+		 * ourselves, since the queue handler won't know what device
+		 * the queue actually represents.   We could look it up, but it
+		 * is pointless work.
+		 */
+		scsi_initialize_queue(SDpnt, shpnt);
+		SDpnt->request_queue.queuedata = (void *) SDpnt;
+		/* Make sure we have something that is valid for DMA purposes */
+		scsi_result = ((!shpnt->unchecked_isa_dma)
+			       ? &scsi_result0[0] : kmalloc(512, GFP_DMA));
+	}
+
+	if (scsi_result == NULL) {
+		printk("Unable to obtain scsi_result buffer\n");
+		goto leave;
+	}
+	/*
+	 * We must chain ourself in the host_queue, so commands can time out 
+	 */
+	SDpnt->queue_depth = 1;
+	SDpnt->host = shpnt;
+	SDpnt->online = TRUE;
+
+	initialize_merge_fn(SDpnt);
+
+#if 0
+        /*
+         * Initialize the object that we will use to wait for command blocks.
+         */
+	init_waitqueue_head(&SDpnt->scpnt_wait);
+#endif
+
+	/*
+	 * Next, hook the device to the host in question.
+	 */
+	SDpnt->prev = NULL;
+	SDpnt->next = NULL;
+	if (shpnt->host_queue != NULL) {
+		SDtail = shpnt->host_queue;
+		while (SDtail->next != NULL)
+			SDtail = SDtail->next;
+
+		SDtail->next = SDpnt;
+		SDpnt->prev = SDtail;
+	} else {
+		shpnt->host_queue = SDpnt;
+	}
+
+	/*
+	 * We need to increment the counter for this one device so we can track
+	 * when things are quiet.
+	 */
+	if (hardcoded == 1) {
+		Scsi_Device *oldSDpnt = SDpnt;
+		struct Scsi_Device_Template *sdtpnt;
+		channel = hchannel;
+		if (channel > shpnt->max_channel)
+			goto leave;
+		dev = hid;
+		if (dev >= shpnt->max_id)
+			goto leave;
+		lun = hlun;
+		if (lun >= shpnt->max_lun)
+			goto leave;
+		if ((0 == lun) || (lun > 7))
+			lun0_sl = SCSI_3; /* actually don't care for 0 == lun */
+		else
+			lun0_sl = find_lun0_scsi_level(channel, dev, shpnt);
+		scan_scsis_single(channel, dev, lun, lun0_sl, &max_dev_lun, 
+				  &sparse_lun, &SDpnt, shpnt, scsi_result);
+		if (SDpnt != oldSDpnt) {
+
+			/* it could happen the blockdevice hasn't yet been inited */
+			/* queue_depth() moved from scsi_proc_info() so that
+			   it is called before scsi_build_commandblocks() */
+			if (shpnt->select_queue_depths != NULL)
+				(shpnt->select_queue_depths)(shpnt,
+							     shpnt->host_queue);
+
+			for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+				if (sdtpnt->init && sdtpnt->dev_noticed)
+					(*sdtpnt->init) ();
+
+			for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+				if (sdtpnt->attach) {
+					(*sdtpnt->attach) (oldSDpnt);
+					if (oldSDpnt->attached) {
+						scsi_build_commandblocks(oldSDpnt);
+						if (0 == oldSDpnt->has_cmdblocks) {
+							printk("scan_scsis: DANGER, no command blocks\n");
+							/* What to do now ?? */
+						}
+					}
+				}
+			}
+			scsi_resize_dma_pool();
+
+			for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+				if (sdtpnt->finish && sdtpnt->nr_dev) {
+					(*sdtpnt->finish) ();
+				}
+			}
+		}
+	} else {
+		/* Actual LUN. PC ordering is 0->n IBM/spec ordering is n->0 */
+		int order_dev;
+
+		for (channel = 0; channel <= shpnt->max_channel; channel++) {
+			for (dev = 0; dev < shpnt->max_id; ++dev) {
+				if (shpnt->reverse_ordering)
+					/* Shift to scanning 15,14,13... or 7,6,5,4, */
+					order_dev = shpnt->max_id - dev - 1;
+				else
+					order_dev = dev;
+
+				if (shpnt->this_id != order_dev) {
+
+					/*
+					 * We need the for so our continue, etc. work fine. We put this in
+					 * a variable so that we can override it during the scan if we
+					 * detect a device *KNOWN* to have multiple logical units.
+					 */
+					max_dev_lun = (max_scsi_luns < shpnt->max_lun ?
+					 max_scsi_luns : shpnt->max_lun);
+					sparse_lun = 0;
+					for (lun = 0, lun0_sl = SCSI_2; lun < max_dev_lun; ++lun) {
+						/* don't probe further for luns > 7 for targets <= SCSI_2 */
+						if ((lun0_sl < SCSI_3) && (lun > 7))
+							break;
+
+						if (!scan_scsis_single(channel, order_dev, lun, lun0_sl,
+							 	       &max_dev_lun, &sparse_lun, &SDpnt, shpnt,
+								       scsi_result)
+						    && !sparse_lun)
+							break;	/* break means don't probe further for luns!=0 */
+						if (SDpnt && (0 == lun)) {
+							int bflags = get_device_flags (scsi_result);
+							if (bflags & BLIST_LARGELUN)
+								lun0_sl = SCSI_3; /* treat as SCSI 3 */
+							else
+								lun0_sl = SDpnt->scsi_level;
+						}
+					}	/* for lun ends */
+				}	/* if this_id != id ends */
+			}	/* for dev ends */
+		}		/* for channel ends */
+	}			/* if/else hardcoded */
+
+      leave:
+
+	{			/* Unchain SRpnt from host_queue */
+		Scsi_Device *prev, *next;
+		Scsi_Device *dqptr;
+
+		for (dqptr = shpnt->host_queue; dqptr != SDpnt; dqptr = dqptr->next)
+			continue;
+		if (dqptr) {
+			prev = dqptr->prev;
+			next = dqptr->next;
+			if (prev)
+				prev->next = next;
+			else
+				shpnt->host_queue = next;
+			if (next)
+				next->prev = prev;
+		}
+	}
+
+	/* Last device block does not exist.  Free memory. */
+	if (SDpnt != NULL) {
+		blk_cleanup_queue(&SDpnt->request_queue);
+		kfree((char *) SDpnt);
+	}
+
+	/* If we allocated a buffer so we could do DMA, free it now */
+	if (scsi_result != &scsi_result0[0] && scsi_result != NULL) {
+		kfree(scsi_result);
+	} {
+		Scsi_Device *sdev;
+		Scsi_Cmnd *scmd;
+
+		SCSI_LOG_SCAN_BUS(4, printk("Host status for host %p:\n", shpnt));
+		for (sdev = shpnt->host_queue; sdev; sdev = sdev->next) {
+			SCSI_LOG_SCAN_BUS(4, printk("Device %d %p: ", sdev->id, sdev));
+			for (scmd = sdev->device_queue; scmd; scmd = scmd->next) {
+				SCSI_LOG_SCAN_BUS(4, printk("%p ", scmd));
+			}
+			SCSI_LOG_SCAN_BUS(4, printk("\n"));
+		}
+	}
+}
+
+/*
+ * The worker for scan_scsis.
+ * Returning 0 means Please don't ask further for lun!=0, 1 means OK go on.
+ * Global variables used : scsi_devices(linked list)
+ */
+static int scan_scsis_single(unsigned int channel, unsigned int dev,
+		unsigned int lun, int lun0_scsi_level,
+		unsigned int *max_dev_lun, unsigned int *sparse_lun, 
+		Scsi_Device ** SDpnt2, struct Scsi_Host *shpnt, 
+		char *scsi_result)
+{
+	char devname[64];
+	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
+	struct Scsi_Device_Template *sdtpnt;
+	Scsi_Device *SDtail, *SDpnt = *SDpnt2;
+	Scsi_Request * SRpnt;
+	int bflags, type = -1;
+#ifdef DEVFS_MUST_DIE
+	extern devfs_handle_t scsi_devfs_handle;
+#endif
+	int scsi_level;
+
+	SDpnt->host = shpnt;
+	SDpnt->id = dev;
+	SDpnt->lun = lun;
+	SDpnt->channel = channel;
+	SDpnt->online = TRUE;
+
+	scsi_build_commandblocks(SDpnt);
+ 
+	/* Some low level driver could use device->type (DB) */
+	SDpnt->type = -1;
+
+	/*
+	 * Assume that the device will have handshaking problems, and then fix
+	 * this field later if it turns out it doesn't
+	 */
+	SDpnt->borken = 1;
+	SDpnt->was_reset = 0;
+	SDpnt->expecting_cc_ua = 0;
+	SDpnt->starved = 0;
+
+	if (NULL == (SRpnt = scsi_allocate_request(SDpnt))) {
+		printk("scan_scsis_single: no memory\n");
+		return 0;
+	}
+
+	/*
+	 * We used to do a TEST_UNIT_READY before the INQUIRY but that was 
+	 * not really necessary.  Spec recommends using INQUIRY to scan for
+	 * devices (and TEST_UNIT_READY to poll for media change). - Paul G.
+	 */
+
+	SCSI_LOG_SCAN_BUS(3, printk("scsi: performing INQUIRY\n"));
+	/*
+	 * Build an INQUIRY command block.
+	 */
+	scsi_cmd[0] = INQUIRY;
+	if ((lun > 0) && (lun0_scsi_level <= SCSI_2))
+		scsi_cmd[1] = (lun << 5) & 0xe0;
+	else	
+		scsi_cmd[1] = 0;	/* SCSI_3 and higher, don't touch */
+	scsi_cmd[2] = 0;
+	scsi_cmd[3] = 0;
+	scsi_cmd[4] = 255;
+	scsi_cmd[5] = 0;
+	SRpnt->sr_cmd_len = 0;
+	SRpnt->sr_data_direction = SCSI_DATA_READ;
+
+	scsi_wait_req (SRpnt, (void *) scsi_cmd,
+	          (void *) scsi_result,
+	          256, SCSI_TIMEOUT+4*HZ, 3);
+
+	SCSI_LOG_SCAN_BUS(3, printk("scsi: INQUIRY %s with code 0x%x\n",
+		SRpnt->sr_result ? "failed" : "successful", SRpnt->sr_result));
+
+	/*
+	 * Now that we don't do TEST_UNIT_READY anymore, we must be prepared
+	 * for media change conditions here, so cannot require zero result.
+	 */
+	if (SRpnt->sr_result) {
+		if ((driver_byte(SRpnt->sr_result) & DRIVER_SENSE) != 0 &&
+		    (SRpnt->sr_sense_buffer[2] & 0xf) == UNIT_ATTENTION &&
+		    SRpnt->sr_sense_buffer[12] == 0x28 &&
+		    SRpnt->sr_sense_buffer[13] == 0) {
+			/* not-ready to ready transition - good */
+		} else {
+			/* assume no peripheral if any other sort of error */
+			scsi_release_request(SRpnt);
+			return 0;
+		}
+	}
+
+	/*
+	 * Check for SPARSELUN before checking the peripheral qualifier,
+	 * so sparse lun devices are completely scanned.
+	 */
+
+	/*
+	 * Get any flags for this device.  
+	 */
+	bflags = get_device_flags (scsi_result);
+
+	if (bflags & BLIST_SPARSELUN) {
+	  *sparse_lun = 1;
+	}
+	/*
+	 * Check the peripheral qualifier field - this tells us whether LUNS
+	 * are supported here or not.
+	 */
+	if ((scsi_result[0] >> 5) == 3) {
+		scsi_release_request(SRpnt);
+		return 0;	/* assume no peripheral if any sort of error */
+	}
+	 /*   The Toshiba ROM was "gender-changed" here as an inline hack.
+	      This is now much more generic.
+	      This is a mess: What we really want is to leave the scsi_result
+	      alone, and just change the SDpnt structure. And the SDpnt is what
+	      we want print_inquiry to print.  -- REW
+	 */
+	if (bflags & BLIST_ISDISK) {
+		scsi_result[0] = TYPE_DISK;                                                
+		scsi_result[1] |= 0x80;     /* removable */
+	}
+
+	if (bflags & BLIST_ISROM) {
+		scsi_result[0] = TYPE_ROM;
+		scsi_result[1] |= 0x80;     /* removable */
+	}
+    
+	memcpy(SDpnt->vendor, scsi_result + 8, 8);
+	memcpy(SDpnt->model, scsi_result + 16, 16);
+	memcpy(SDpnt->rev, scsi_result + 32, 4);
+
+	SDpnt->removable = (0x80 & scsi_result[1]) >> 7;
+	/* Use the peripheral qualifier field to determine online/offline */
+	if (((scsi_result[0] >> 5) & 7) == 1) 	SDpnt->online = FALSE;
+	else SDpnt->online = TRUE;
+	SDpnt->lockable = SDpnt->removable;
+	SDpnt->changed = 0;
+	SDpnt->access_count = 0;
+	SDpnt->busy = 0;
+	SDpnt->has_cmdblocks = 0;
+	/*
+	 * Currently, all sequential devices are assumed to be tapes, all random
+	 * devices disk, with the appropriate read only flags set for ROM / WORM
+	 * treated as RO.
+	 */
+	switch (type = (scsi_result[0] & 0x1f)) {
+	case TYPE_TAPE:
+	case TYPE_DISK:
+	case TYPE_PRINTER:
+	case TYPE_MOD:
+	case TYPE_PROCESSOR:
+	case TYPE_SCANNER:
+	case TYPE_MEDIUM_CHANGER:
+	case TYPE_ENCLOSURE:
+	case TYPE_COMM:
+		SDpnt->writeable = 1;
+		break;
+	case TYPE_WORM:
+	case TYPE_ROM:
+		SDpnt->writeable = 0;
+		break;
+	default:
+		printk("scsi: unknown type %d\n", type);
+	}
+
+	SDpnt->device_blocked = FALSE;
+	SDpnt->device_busy = 0;
+	SDpnt->single_lun = 0;
+	SDpnt->soft_reset =
+	    (scsi_result[7] & 1) && ((scsi_result[3] & 7) == 2);
+	SDpnt->random = (type == TYPE_TAPE) ? 0 : 1;
+	SDpnt->type = (type & 0x1f);
+
+	print_inquiry(scsi_result);
+
+        sprintf (devname, "host%d/bus%d/target%d/lun%d",
+                 SDpnt->host->host_no, SDpnt->channel, SDpnt->id, SDpnt->lun);
+#ifdef DEVFS_MUST_DIE
+        if (SDpnt->de) printk ("DEBUG: dir: \"%s\" already exists\n", devname);
+        else SDpnt->de = devfs_mk_dir (scsi_devfs_handle, devname, NULL);
+#endif
+
+	for (sdtpnt = scsi_devicelist; sdtpnt;
+	     sdtpnt = sdtpnt->next)
+		if (sdtpnt->detect)
+			SDpnt->attached +=
+			    (*sdtpnt->detect) (SDpnt);
+
+	SDpnt->scsi_level = scsi_result[2] & 0x07;
+	if (SDpnt->scsi_level >= 2 ||
+	    (SDpnt->scsi_level == 1 &&
+	     (scsi_result[3] & 0x0f) == 1))
+		SDpnt->scsi_level++;
+	scsi_level = SDpnt->scsi_level;
+
+	/*
+	 * Accommodate drivers that want to sleep when they should be in a polling
+	 * loop.
+	 */
+	SDpnt->disconnect = 0;
+
+
+	/*
+	 * Set the tagged_queue flag for SCSI-II devices that purport to support
+	 * tagged queuing in the INQUIRY data.
+	 */
+	SDpnt->tagged_queue = 0;
+	if ((SDpnt->scsi_level >= SCSI_2) &&
+	    (scsi_result[7] & 2) &&
+	    !(bflags & BLIST_NOTQ)) {
+		SDpnt->tagged_supported = 1;
+		SDpnt->current_tag = 0;
+	}
+	/*
+	 * Some revisions of the Texel CD ROM drives have handshaking problems when
+	 * used with the Seagate controllers.  Before we know what type of device
+	 * we're talking to, we assume it's borken and then change it here if it
+	 * turns out that it isn't a TEXEL drive.
+	 */
+	if ((bflags & BLIST_BORKEN) == 0)
+		SDpnt->borken = 0;
+
+	/*
+	 * If we want to only allow I/O to one of the luns attached to this device
+	 * at a time, then we set this flag.
+	 */
+	if (bflags & BLIST_SINGLELUN)
+		SDpnt->single_lun = 1;
+
+	/*
+	 * These devices need this "key" to unlock the devices so we can use it
+	 */
+	if ((bflags & BLIST_KEY) != 0) {
+		printk("Unlocked floptical drive.\n");
+		SDpnt->lockable = 0;
+		scsi_cmd[0] = MODE_SENSE;
+		if (shpnt->max_lun <= 8)
+			scsi_cmd[1] = (lun << 5) & 0xe0;
+		else	scsi_cmd[1] = 0;	/* any other idea? */
+		scsi_cmd[2] = 0x2e;
+		scsi_cmd[3] = 0;
+		scsi_cmd[4] = 0x2a;
+		scsi_cmd[5] = 0;
+		SRpnt->sr_cmd_len = 0;
+		SRpnt->sr_data_direction = SCSI_DATA_READ;
+		scsi_wait_req (SRpnt, (void *) scsi_cmd,
+	        	(void *) scsi_result, 0x2a,
+	        	SCSI_TIMEOUT, 3);
+	}
+
+	scsi_release_request(SRpnt);
+	SRpnt = NULL;
+
+	scsi_release_commandblocks(SDpnt);
+
+	/*
+	 * This device was already hooked up to the host in question,
+	 * so at this point we just let go of it and it should be fine.  We do need to
+	 * allocate a new one and attach it to the host so that we can further scan the bus.
+	 */
+	SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device), GFP_ATOMIC);
+	if (!SDpnt) {
+		printk("scsi: scan_scsis_single: Cannot malloc\n");
+		return 0;
+	}
+        memset(SDpnt, 0, sizeof(Scsi_Device));
+
+	*SDpnt2 = SDpnt;
+	SDpnt->queue_depth = 1;
+	SDpnt->host = shpnt;
+	SDpnt->online = TRUE;
+	SDpnt->scsi_level = scsi_level;
+
+	/*
+	 * Register the queue for the device.  All I/O requests will come
+	 * in through here.  We also need to register a pointer to
+	 * ourselves, since the queue handler won't know what device
+	 * the queue actually represents.   We could look it up, but it
+	 * is pointless work.
+	 */
+	scsi_initialize_queue(SDpnt, shpnt);
+	SDpnt->host = shpnt;
+	initialize_merge_fn(SDpnt);
+
+	/*
+	 * Mark this device as online, or otherwise we won't be able to do much with it.
+	 */
+	SDpnt->online = TRUE;
+
+#if 0
+        /*
+         * Initialize the object that we will use to wait for command blocks.
+         */
+	init_waitqueue_head(&SDpnt->scpnt_wait);
+#endif
+
+	/*
+	 * Since we just found one device, there had damn well better be one in the list
+	 * already.
+	 */
+	if (shpnt->host_queue == NULL)
+		panic("scan_scsis_single: Host queue == NULL\n");
+
+	SDtail = shpnt->host_queue;
+	while (SDtail->next) {
+		SDtail = SDtail->next;
+	}
+
+	/* Add this device to the linked list at the end */
+	SDtail->next = SDpnt;
+	SDpnt->prev = SDtail;
+	SDpnt->next = NULL;
+
+	/*
+	 * Some scsi devices cannot be polled for lun != 0 due to firmware bugs
+	 */
+	if (bflags & BLIST_NOLUN)
+		return 0;	/* break; */
+
+	/*
+	 * If this device is known to support sparse multiple units, override the
+	 * other settings, and scan all of them.
+	 */
+	if (bflags & BLIST_SPARSELUN) {
+		*max_dev_lun = shpnt->max_lun;
+		*sparse_lun = 1;
+		return 1;
+	}
+	/*
+	 * If this device is known to support multiple units, override the other
+	 * settings, and scan all of them.
+	 */
+	if (bflags & BLIST_FORCELUN) {
+		/* 
+		 * Scanning MAX_SCSI_LUNS units would be a bad idea.
+		 * Any better idea?
+		 * I think we need REPORT LUNS in future to avoid scanning
+		 * of unused LUNs. But, that is another item.
+		 */
+		if (*max_dev_lun < shpnt->max_lun)
+			*max_dev_lun = shpnt->max_lun;
+		else 	if ((max_scsi_luns >> 1) >= *max_dev_lun)
+				*max_dev_lun += shpnt->max_lun;
+			else	*max_dev_lun = max_scsi_luns;
+		return 1;
+	}
+	/*
+	 * REGAL CDC-4X: avoid hang after LUN 4
+	 */
+	if (bflags & BLIST_MAX5LUN) {
+		*max_dev_lun = 5;
+		return 1;
+	}
+
+	/*
+	 * We assume the device can't handle lun!=0 if: - it reports scsi-0
+	 * (ANSI SCSI Revision 0) (old drives like MAXTOR XT-3280) or - it
+	 * reports scsi-1 (ANSI SCSI Revision 1) and Response Data Format 0
+	 */
+	if (((scsi_result[2] & 0x07) == 0)
+	    ||
+	    ((scsi_result[2] & 0x07) == 1 &&
+	     (scsi_result[3] & 0x0f) == 0))
+		return 0;
+	return 1;
+}
+
+/*
+ * The worker for scan_scsis.
+ * Returns the scsi_level of lun0 on this host, channel and dev (if already
+ * known), otherwise returns SCSI_2.
+ */
+static int find_lun0_scsi_level(unsigned int channel, unsigned int dev,
+				struct Scsi_Host *shpnt)
+{
+	int res = SCSI_2;
+	Scsi_Device *SDpnt;
+
+	for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next)
+	{
+		if ((0 == SDpnt->lun) && (dev == SDpnt->id) &&
+		    (channel == SDpnt->channel))
+			return (int)SDpnt->scsi_level;
+	}
+	/* haven't found lun0, should send INQUIRY but take easy route */
+	return res;
+}
diff --git a/xen/drivers/scsi/scsi_syms.c b/xen/drivers/scsi/scsi_syms.c
new file mode 100644
index 0000000000..82b23e84ce
--- /dev/null
+++ b/xen/drivers/scsi/scsi_syms.c
@@ -0,0 +1,105 @@
+/*
+ * We should not even be trying to compile this if we are not doing
+ * a module.
+ */
+#define __NO_VERSION__
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+/*  #include <xeno/string.h> */
+/*  #include <xeno/slab.h> */
+/*  #include <xeno/ioport.h> */
+/*  #include <xeno/kernel.h> */
+#include <xeno/blk.h>
+/* #include <xeno/fs.h> */
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include <scsi/scsi_ioctl.h>
+#include "hosts.h"
+#include "constants.h"
+
+#include "sd.h"
+#include <scsi/scsicam.h>
+
+/*
+ * This source file contains the symbol table used by scsi loadable
+ * modules.
+ */
+EXPORT_SYMBOL(scsi_register_module);
+EXPORT_SYMBOL(scsi_unregister_module);
+EXPORT_SYMBOL(scsi_free);
+EXPORT_SYMBOL(scsi_malloc);
+EXPORT_SYMBOL(scsi_register);
+EXPORT_SYMBOL(scsi_unregister);
+EXPORT_SYMBOL(scsicam_bios_param);
+EXPORT_SYMBOL(scsi_partsize);
+EXPORT_SYMBOL(scsi_allocate_device);
+EXPORT_SYMBOL(scsi_do_cmd);
+EXPORT_SYMBOL(scsi_command_size);
+EXPORT_SYMBOL(scsi_ioctl);
+EXPORT_SYMBOL(print_command);
+EXPORT_SYMBOL(print_sense);
+EXPORT_SYMBOL(print_req_sense);
+EXPORT_SYMBOL(print_msg);
+EXPORT_SYMBOL(print_status);
+EXPORT_SYMBOL(scsi_dma_free_sectors);
+EXPORT_SYMBOL(kernel_scsi_ioctl);
+EXPORT_SYMBOL(scsi_need_isa_buffer);
+EXPORT_SYMBOL(scsi_release_command);
+EXPORT_SYMBOL(print_Scsi_Cmnd);
+EXPORT_SYMBOL(scsi_block_when_processing_errors);
+EXPORT_SYMBOL(scsi_mark_host_reset);
+EXPORT_SYMBOL(scsi_ioctl_send_command);
+#if defined(CONFIG_SCSI_LOGGING)	/* { */
+EXPORT_SYMBOL(scsi_logging_level);
+#endif
+
+EXPORT_SYMBOL(scsi_allocate_request);
+EXPORT_SYMBOL(scsi_release_request);
+EXPORT_SYMBOL(scsi_wait_req);
+EXPORT_SYMBOL(scsi_do_req);
+
+EXPORT_SYMBOL(scsi_report_bus_reset);
+EXPORT_SYMBOL(scsi_block_requests);
+EXPORT_SYMBOL(scsi_unblock_requests);
+
+EXPORT_SYMBOL(scsi_get_host_dev);
+EXPORT_SYMBOL(scsi_free_host_dev);
+
+EXPORT_SYMBOL(scsi_sleep);
+
+EXPORT_SYMBOL(proc_print_scsidevice);
+EXPORT_SYMBOL(proc_scsi);
+
+EXPORT_SYMBOL(scsi_io_completion);
+EXPORT_SYMBOL(scsi_end_request);
+
+EXPORT_SYMBOL(scsi_register_blocked_host);
+EXPORT_SYMBOL(scsi_deregister_blocked_host);
+
+/*
+ * This symbol is for the highlevel drivers (e.g. sg) only.
+ */
+EXPORT_SYMBOL(scsi_reset_provider);
+
+/*
+ * These are here only while I debug the rest of the scsi stuff.
+ */
+EXPORT_SYMBOL(scsi_hostlist);
+EXPORT_SYMBOL(scsi_hosts);
+EXPORT_SYMBOL(scsi_devicelist);
+EXPORT_SYMBOL(scsi_device_types);
+
+/*
+ * Externalize timers so that HBAs can safely start/restart commands.
+ */
+extern void scsi_add_timer(Scsi_Cmnd *, int, void ((*) (Scsi_Cmnd *)));
+extern int scsi_delete_timer(Scsi_Cmnd *);
+EXPORT_SYMBOL(scsi_add_timer);
+EXPORT_SYMBOL(scsi_delete_timer);
diff --git a/xen/drivers/scsi/scsicam.c b/xen/drivers/scsi/scsicam.c
new file mode 100644
index 0000000000..ae13d27cdc
--- /dev/null
+++ b/xen/drivers/scsi/scsicam.c
@@ -0,0 +1,236 @@
+/*
+ * scsicam.c - SCSI CAM support functions, use for HDIO_GETGEO, etc.
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing 
+ *      (Unix and Linux consulting and custom programming)
+ *      drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * For more information, please consult the SCSI-CAM draft.
+ */
+
+#define __NO_VERSION__
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+
+/*#include <linux/fs.h>*/
+/*#include <linux/genhd.h>*/
+#include <xeno/blk.h>
+/*#include <linux/kernel.h>*/
+#include <asm/unaligned.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+#include <scsi/scsicam.h>
+
+static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
+		   unsigned int *secs);
+
+
+/*
+ * Function : int scsicam_bios_param (Disk *disk, int dev, int *ip)
+ *
+ * Purpose : to determine the BIOS mapping used for a drive in a 
+ *      SCSI-CAM system, storing the results in ip as required
+ *      by the HDIO_GETGEO ioctl().
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ */
+
+int scsicam_bios_param(Disk * disk,	/* SCSI disk */
+		       kdev_t dev,	/* Device major, minor */
+		  int *ip /* Heads, sectors, cylinders in that order */ )
+{
+	struct buffer_head *bh;
+	int ret_code;
+	int size = disk->capacity;
+	unsigned long temp_cyl;
+
+#if 0
+	if (!(bh = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev))))
+		return -1;
+#else 
+	bh = NULL; 
+	printk("scsicam_bios_param: bread not avail!\n"); 
+	BUG(); 
+#endif
+
+	/* try to infer mapping from partition table */
+	ret_code = scsi_partsize(bh, (unsigned long) size, (unsigned int *) ip + 2,
+		       (unsigned int *) ip + 0, (unsigned int *) ip + 1);
+#if 0
+	brelse(bh);
+#endif
+
+	if (ret_code == -1) {
+		/* pick some standard mapping with at most 1024 cylinders,
+		   and at most 62 sectors per track - this works up to
+		   7905 MB */
+		ret_code = setsize((unsigned long) size, (unsigned int *) ip + 2,
+		       (unsigned int *) ip + 0, (unsigned int *) ip + 1);
+	}
+	/* if something went wrong, then apparently we have to return
+	   a geometry with more than 1024 cylinders */
+	if (ret_code || ip[0] > 255 || ip[1] > 63) {
+		ip[0] = 64;
+		ip[1] = 32;
+		temp_cyl = size / (ip[0] * ip[1]);
+		if (temp_cyl > 65534) {
+			ip[0] = 255;
+			ip[1] = 63;
+		}
+		ip[2] = size / (ip[0] * ip[1]);
+	}
+	return 0;
+}
+
+/*
+ * Function : static int scsi_partsize(struct buffer_head *bh, unsigned long 
+ *     capacity,unsigned int *cyls, unsigned int *hds, unsigned int *secs);
+ *
+ * Purpose : to determine the BIOS mapping used to create the partition
+ *      table, storing the results in *cyls, *hds, and *secs 
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ */
+
+int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+	       unsigned int *cyls, unsigned int *hds, unsigned int *secs)
+{
+	struct partition *p, *largest = NULL;
+	int i, largest_cyl;
+	int cyl, ext_cyl, end_head, end_cyl, end_sector;
+	unsigned int logical_end, physical_end, ext_physical_end;
+
+
+	if (*(unsigned short *) (bh->b_data + 510) == 0xAA55) {
+		for (largest_cyl = -1, p = (struct partition *)
+		     (0x1BE + bh->b_data), i = 0; i < 4; ++i, ++p) {
+			if (!p->sys_ind)
+				continue;
+#ifdef DEBUG
+			printk("scsicam_bios_param : partition %d has system \n",
+			       i);
+#endif
+			cyl = p->cyl + ((p->sector & 0xc0) << 2);
+			if (cyl > largest_cyl) {
+				largest_cyl = cyl;
+				largest = p;
+			}
+		}
+	}
+	if (largest) {
+		end_cyl = largest->end_cyl + ((largest->end_sector & 0xc0) << 2);
+		end_head = largest->end_head;
+		end_sector = largest->end_sector & 0x3f;
+
+		if (end_head + 1 == 0 || end_sector == 0)
+			return -1;
+
+#ifdef DEBUG
+		printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n",
+		       end_head, end_cyl, end_sector);
+#endif
+
+		physical_end = end_cyl * (end_head + 1) * end_sector +
+		    end_head * end_sector + end_sector;
+
+		/* This is the actual _sector_ number at the end */
+		logical_end = get_unaligned(&largest->start_sect)
+		    + get_unaligned(&largest->nr_sects);
+
+		/* This is for >1023 cylinders */
+		ext_cyl = (logical_end - (end_head * end_sector + end_sector))
+		    / (end_head + 1) / end_sector;
+		ext_physical_end = ext_cyl * (end_head + 1) * end_sector +
+		    end_head * end_sector + end_sector;
+
+#ifdef DEBUG
+		printk("scsicam_bios_param : logical_end=%d physical_end=%d ext_physical_end=%d ext_cyl=%d\n"
+		  ,logical_end, physical_end, ext_physical_end, ext_cyl);
+#endif
+
+		if ((logical_end == physical_end) ||
+		  (end_cyl == 1023 && ext_physical_end == logical_end)) {
+			*secs = end_sector;
+			*hds = end_head + 1;
+			*cyls = capacity / ((end_head + 1) * end_sector);
+			return 0;
+		}
+#ifdef DEBUG
+		printk("scsicam_bios_param : logical (%u) != physical (%u)\n",
+		       logical_end, physical_end);
+#endif
+	}
+	return -1;
+}
+
+/*
+ * Function : static int setsize(unsigned long capacity,unsigned int *cyls,
+ *      unsigned int *hds, unsigned int *secs);
+ *
+ * Purpose : to determine a near-optimal int 0x13 mapping for a
+ *      SCSI disk in terms of lost space of size capacity, storing
+ *      the results in *cyls, *hds, and *secs.
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ * Extracted from
+ *
+ * WORKING                                                    X3T9.2
+ * DRAFT                                                        792D
+ *
+ *
+ *                                                        Revision 6
+ *                                                         10-MAR-94
+ * Information technology -
+ * SCSI-2 Common access method
+ * transport and SCSI interface module
+ * 
+ * ANNEX A :
+ *
+ * setsize() converts a read capacity value to int 13h
+ * head-cylinder-sector requirements. It minimizes the value for
+ * number of heads and maximizes the number of cylinders. This
+ * will support rather large disks before the number of heads
+ * will not fit in 4 bits (or 6 bits). This algorithm also
+ * minimizes the number of sectors that will be unused at the end
+ * of the disk while allowing for very large disks to be
+ * accommodated. This algorithm does not use physical geometry. 
+ */
+
+static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
+		   unsigned int *secs)
+{
+	unsigned int rv = 0;
+	unsigned long heads, sectors, cylinders, temp;
+
+	cylinders = 1024L;	/* Set number of cylinders to max */
+	sectors = 62L;		/* Maximize sectors per track */
+
+	temp = cylinders * sectors;	/* Compute divisor for heads */
+	heads = capacity / temp;	/* Compute value for number of heads */
+	if (capacity % temp) {	/* If no remainder, done! */
+		heads++;	/* Else, increment number of heads */
+		temp = cylinders * heads;	/* Compute divisor for sectors */
+		sectors = capacity / temp;	/* Compute value for sectors per
+						   track */
+		if (capacity % temp) {	/* If no remainder, done! */
+			sectors++;	/* Else, increment number of sectors */
+			temp = heads * sectors;		/* Compute divisor for cylinders */
+			cylinders = capacity / temp;	/* Compute number of cylinders */
+		}
+	}
+	if (cylinders == 0)
+		rv = (unsigned) -1;	/* Give error if 0 cylinders */
+
+	*cyls = (unsigned int) cylinders;	/* Stuff return values */
+	*secs = (unsigned int) sectors;
+	*hds = (unsigned int) heads;
+	return (rv);
+}
diff --git a/xen/drivers/scsi/sd.c b/xen/drivers/scsi/sd.c
new file mode 100644
index 0000000000..dbb69d2447
--- /dev/null
+++ b/xen/drivers/scsi/sd.c
@@ -0,0 +1,1512 @@
+/*
+ *      sd.c Copyright (C) 1992 Drew Eckhardt
+ *           Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
+ *
+ *      Linux scsi disk driver
+ *              Initial versions: Drew Eckhardt
+ *              Subsequent revisions: Eric Youngdale
+ *
+ *      <drew@colorado.edu>
+ *
+ *       Modified by Eric Youngdale ericy@andante.org to
+ *       add scatter-gather, multiple outstanding request, and other
+ *       enhancements.
+ *
+ *       Modified by Eric Youngdale eric@andante.org to support loadable
+ *       low-level scsi drivers.
+ *
+ *       Modified by Jirka Hanika geo@ff.cuni.cz to support more
+ *       scsi disks using eight major numbers.
+ *
+ *       Modified by Richard Gooch rgooch@atnf.csiro.au to support devfs.
+ *	
+ *	 Modified by Torben Mathiasen tmm@image.dk
+ *       Resource allocation fixes in sd_init and cleanups.
+ *	
+ *	 Modified by Alex Davis <letmein@erols.com>
+ *       Fix problem where partition info not being read in sd_open.
+ *	
+ *	 Modified by Alex Davis <letmein@erols.com>
+ *       Fix problem where removable media could be ejected after sd_open.
+ */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+/*  #include <xeno/fs.h> */
+/*  #include <xeno/kernel.h> */
+#include <xeno/sched.h>
+/*  #include <xeno/mm.h> */
+/*  #include <xeno/string.h> */
+#include <xeno/hdreg.h>
+/*  #include <xeno/errno.h> */
+/*  #include <xeno/interrupt.h> */
+#include <xeno/init.h>
+
+/*  #include <xeno/smp.h> */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#define MAJOR_NR SCSI_DISK0_MAJOR
+#include <xeno/blk.h>
+#include <xeno/blkpg.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+#include <scsi/scsi_ioctl.h>
+#include "constants.h"
+#include <scsi/scsicam.h>	/* must follow "hosts.h" */
+
+#include <xeno/genhd.h>
+
+/*
+ *  static const char RCSid[] = "$Header:";
+ */
+
+/* system major --> sd_gendisks index */
+#define SD_MAJOR_IDX(i)		(MAJOR(i) & SD_MAJOR_MASK)
+/* sd_gendisks index --> system major */
+#define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i))
+
+#define SD_PARTITION(dev)	((SD_MAJOR_IDX(dev) << 8) | (MINOR(dev) & 255))
+
+#define SCSI_DISKS_PER_MAJOR	16
+#define SD_MAJOR_NUMBER(i)	SD_MAJOR((i) >> 8)
+#define SD_MINOR_NUMBER(i)	((i) & 255)
+#define MKDEV_SD_PARTITION(i)	MKDEV(SD_MAJOR_NUMBER(i), (i) & 255)
+#define MKDEV_SD(index)		MKDEV_SD_PARTITION((index) << 4)
+#define N_USED_SCSI_DISKS  (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1)
+#define N_USED_SD_MAJORS   (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR)
+
+#define MAX_RETRIES 5
+
+/*
+ *  Time out in seconds for disks and Magneto-opticals (which are slower).
+ */
+
+#define SD_TIMEOUT (30 * HZ)
+#define SD_MOD_TIMEOUT (75 * HZ)
+
+static Scsi_Disk *rscsi_disks;
+static struct gendisk *sd_gendisks;
+static int *sd_sizes;
+static int *sd_blocksizes;
+static int *sd_hardsizes;	/* Hardware sector size */
+static int *sd_max_sectors;
+
+static int check_scsidisk_media_change(kdev_t);
+static int fop_revalidate_scsidisk(kdev_t);
+
+static int sd_init_onedisk(int);
+
+
+static int sd_init(void);
+static void sd_finish(void);
+static int sd_attach(Scsi_Device *);
+static int sd_detect(Scsi_Device *);
+static void sd_detach(Scsi_Device *);
+static int sd_init_command(Scsi_Cmnd *);
+
+static struct Scsi_Device_Template sd_template = {
+	name:"disk",
+	tag:"sd",
+	scsi_type:TYPE_DISK,
+	major:SCSI_DISK0_MAJOR,
+        /*
+         * Secondary range of majors that this driver handles.
+         */
+	min_major:SCSI_DISK1_MAJOR,
+	max_major:SCSI_DISK7_MAJOR,
+	blk:1,
+	detect:sd_detect,
+	init:sd_init,
+	finish:sd_finish,
+	attach:sd_attach,
+	detach:sd_detach,
+	init_command:sd_init_command,
+};
+
+
+static void rw_intr(Scsi_Cmnd * SCpnt);
+
+#if defined(CONFIG_PPC)
+/*
+ * Moved from arch/ppc/pmac_setup.c.  This is where it really belongs.
+ */
+kdev_t __init
+sd_find_target(void *host, int tgt)
+{
+    Scsi_Disk *dp;
+    int i;
+    for (dp = rscsi_disks, i = 0; i < sd_template.dev_max; ++i, ++dp)
+        if (dp->device != NULL && dp->device->host == host
+            && dp->device->id == tgt)
+            return MKDEV_SD(i);
+    return 0;
+}
+#endif
+
+static int sd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+	kdev_t dev = inode->i_rdev;
+	struct Scsi_Host * host;
+	Scsi_Device * SDev;
+	int diskinfo[4];
+    
+	SDev = rscsi_disks[DEVICE_NR(dev)].device;
+	if (!SDev)
+		return -ENODEV;
+
+	/*
+	 * If we are in the middle of error recovery, don't let anyone
+	 * else try and use this device.  Also, if error recovery fails, it
+	 * may try and take the device offline, in which case all further
+	 * access to the device is prohibited.
+	 */
+
+	if( !scsi_block_when_processing_errors(SDev) )
+	{
+		return -ENODEV;
+	}
+
+	switch (cmd) 
+	{
+		case HDIO_GETGEO:   /* Return BIOS disk parameters */
+		{
+			struct hd_geometry *loc = (struct hd_geometry *) arg;
+			if(!loc)
+				return -EINVAL;
+
+			host = rscsi_disks[DEVICE_NR(dev)].device->host;
+	
+			/* default to most commonly used values */
+	
+		        diskinfo[0] = 0x40;
+	        	diskinfo[1] = 0x20;
+	        	diskinfo[2] = 
+			    rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+	
+			/* override with calculated, extended default,
+                           or driver values */
+	
+			if(host->hostt->bios_param != NULL)
+				host->hostt->bios_param(
+				    &rscsi_disks[DEVICE_NR(dev)], dev,
+				    &diskinfo[0]);
+			else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
+						dev, &diskinfo[0]);
+
+			if (put_user(diskinfo[0], &loc->heads) ||
+				put_user(diskinfo[1], &loc->sectors) ||
+				put_user(diskinfo[2], &loc->cylinders) ||
+				put_user(sd_gendisks[SD_MAJOR_IDX(
+				    inode->i_rdev)].part[MINOR(
+				    inode->i_rdev)].start_sect, &loc->start))
+				return -EFAULT;
+			return 0;
+		}
+		case HDIO_GETGEO_BIG:
+		{
+			struct hd_big_geometry *loc = 
+			    (struct hd_big_geometry *) arg;
+
+			if(!loc)
+				return -EINVAL;
+
+			host = rscsi_disks[DEVICE_NR(dev)].device->host;
+
+			/* default to most commonly used values */
+
+			diskinfo[0] = 0x40;
+			diskinfo[1] = 0x20;
+			diskinfo[2] = 
+			    rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+
+			/* override with calculated, extended default,
+                           or driver values */
+
+			if(host->hostt->bios_param != NULL)
+				host->hostt->bios_param(
+				    &rscsi_disks[DEVICE_NR(dev)], dev,
+				    &diskinfo[0]);
+			else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
+						dev, &diskinfo[0]);
+
+			if (put_user(diskinfo[0], &loc->heads) ||
+				put_user(diskinfo[1], &loc->sectors) ||
+				put_user(diskinfo[2], 
+					 (unsigned int *) &loc->cylinders) ||
+				put_user(sd_gendisks[SD_MAJOR_IDX(
+				    inode->i_rdev)].part[MINOR(
+				    inode->i_rdev)].start_sect, &loc->start))
+				return -EFAULT;
+			return 0;
+		}
+#if 0
+		case BLKGETSIZE:
+		case BLKGETSIZE64:
+		case BLKROSET:
+		case BLKROGET:
+		case BLKRASET:
+		case BLKRAGET:
+		case BLKFLSBUF:
+		case BLKSSZGET:
+		case BLKPG:
+		case BLKELVGET:
+		case BLKELVSET:
+		case BLKBSZGET:
+		case BLKBSZSET:
+			return blk_ioctl(inode->i_rdev, cmd, arg);
+
+		case BLKRRPART: /* Re-read partition tables */
+		        if (!capable(CAP_SYS_ADMIN))
+		                return -EACCES;
+			return revalidate_scsidisk(dev, 1);
+#endif
+
+		default:
+			return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device, 
+					  cmd, (void *) arg);
+	}
+}
+
+static void sd_devname(unsigned int disknum, char *buffer)
+{
+    if (disknum < 26)
+	sprintf(buffer, "sd%c", 'a' + disknum);
+    else {
+	unsigned int min1;
+	unsigned int min2;
+	/*
+	 * For larger numbers of disks, we need to go to a new
+	 * naming scheme.
+	 */
+	min1 = disknum / 26;
+	min2 = disknum % 26;
+	sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
+    }
+}
+
+static request_queue_t *sd_find_queue(kdev_t dev)
+{
+    Scsi_Disk *dpnt;
+    int target;
+    target = DEVICE_NR(dev);
+
+    dpnt = &rscsi_disks[target];
+    if (!dpnt->device)
+	return NULL;	/* No such device */
+    return &dpnt->device->request_queue;
+}
+
+static int sd_init_command(Scsi_Cmnd * SCpnt)
+{
+    int dev, block, this_count;
+    struct hd_struct *ppnt;
+    Scsi_Disk *dpnt;
+#if CONFIG_SCSI_LOGGING
+    char nbuff[6];
+#endif
+
+    ppnt = &sd_gendisks[SD_MAJOR_IDX(SCpnt->request.rq_dev)].part[MINOR(SCpnt->request.rq_dev)];
+    dev = DEVICE_NR(SCpnt->request.rq_dev);
+
+    block = SCpnt->request.sector;
+    this_count = SCpnt->request_bufflen >> 9;
+
+    SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = 0x%x, block = %d\n",
+			       SCpnt->request.rq_dev, block));
+
+    dpnt = &rscsi_disks[dev];
+    if (dev >= sd_template.dev_max ||
+	!dpnt->device ||
+	!dpnt->device->online ||
+	block + SCpnt->request.nr_sectors > ppnt->nr_sects) {
+	SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", 
+				   SCpnt->request.nr_sectors));
+	SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
+	return 0;
+    }
+    block += ppnt->start_sect;
+    if (dpnt->device->changed) {
+	/*
+	 * quietly refuse to do anything to a changed disc until the changed
+	 * bit has been reset
+	 */
+	/* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
+	return 0;
+    }
+    SCSI_LOG_HLQUEUE(2, sd_devname(dev, nbuff));
+    SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n",
+			       nbuff, dev, block));
+
+    /*
+     * If we have a 1K hardware sectorsize, prevent access to single
+     * 512 byte sectors.  In theory we could handle this - in fact
+     * the scsi cdrom driver must be able to handle this because
+     * we typically use 1K blocksizes, and cdroms typically have
+     * 2K hardware sectorsizes.  Of course, things are simpler
+     * with the cdrom, since it is read-only.  For performance
+     * reasons, the filesystems should be able to handle this
+     * and not force the scsi disk driver to use bounce buffers
+     * for this.
+     */
+    if (dpnt->device->sector_size == 1024) {
+	if ((block & 1) || (SCpnt->request.nr_sectors & 1)) {
+	    printk("sd.c:Bad block number requested");
+	    return 0;
+	} else {
+	    block = block >> 1;
+	    this_count = this_count >> 1;
+	}
+    }
+    if (dpnt->device->sector_size == 2048) {
+	if ((block & 3) || (SCpnt->request.nr_sectors & 3)) {
+	    printk("sd.c:Bad block number requested");
+	    return 0;
+	} else {
+	    block = block >> 2;
+	    this_count = this_count >> 2;
+	}
+    }
+    if (dpnt->device->sector_size == 4096) {
+	if ((block & 7) || (SCpnt->request.nr_sectors & 7)) {
+	    printk("sd.c:Bad block number requested");
+	    return 0;
+	} else {
+	    block = block >> 3;
+	    this_count = this_count >> 3;
+	}
+    }
+    switch (SCpnt->request.cmd) {
+    case WRITE:
+	if (!dpnt->device->writeable) {
+	    return 0;
+	}
+	SCpnt->cmnd[0] = WRITE_6;
+	SCpnt->sc_data_direction = SCSI_DATA_WRITE;
+	break;
+    case READ:
+	SCpnt->cmnd[0] = READ_6;
+	SCpnt->sc_data_direction = SCSI_DATA_READ;
+	break;
+    default:
+	panic("Unknown sd command %d\n", SCpnt->request.cmd);
+    }
+
+    SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", nbuff,
+			       (SCpnt->request.cmd == WRITE) ? "writing" : 
+			       "reading", this_count, 
+			       SCpnt->request.nr_sectors));
+
+    SCpnt->cmnd[1] = (SCpnt->device->scsi_level <= SCSI_2) ?
+	((SCpnt->lun << 5) & 0xe0) : 0;
+
+    if (((this_count > 0xff) || (block > 0x1fffff)) || SCpnt->device->ten) {
+	if (this_count > 0xffff)
+	    this_count = 0xffff;
+
+	SCpnt->cmnd[0] += READ_10 - READ_6;
+	SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
+	SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
+	SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
+	SCpnt->cmnd[5] = (unsigned char) block & 0xff;
+	SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0;
+	SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
+	SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
+    } else {
+	if (this_count > 0xff)
+	    this_count = 0xff;
+
+	SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
+	SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
+	SCpnt->cmnd[3] = (unsigned char) block & 0xff;
+	SCpnt->cmnd[4] = (unsigned char) this_count;
+	SCpnt->cmnd[5] = 0;
+    }
+
+    /*
+     * We shouldn't disconnect in the middle of a sector, so with a dumb
+     * host adapter, it's safe to assume that we can at least transfer
+     * this many bytes between each connect / disconnect.
+     */
+    SCpnt->transfersize = dpnt->device->sector_size;
+    SCpnt->underflow = this_count << 9;
+
+    SCpnt->allowed = MAX_RETRIES;
+    SCpnt->timeout_per_command = (SCpnt->device->type == TYPE_DISK ?
+				  SD_TIMEOUT : SD_MOD_TIMEOUT);
+
+    /*
+     * This is the completion routine we use.  This is matched in terms
+     * of capability to this function.
+     */
+    SCpnt->done = rw_intr;
+
+    /*
+     * This indicates that the command is ready from our end to be
+     * queued.
+     */
+    return 1;
+}
+
+static int sd_open(struct inode *inode, struct file *filp)
+{
+    int target, retval = -ENXIO;
+    Scsi_Device * SDev;
+    target = DEVICE_NR(inode->i_rdev);
+
+    SCSI_LOG_HLQUEUE(1, printk("target=%d, max=%d\n", target, sd_template.dev_max));
+
+    if (target >= sd_template.dev_max || !rscsi_disks[target].device)
+	return -ENXIO;	/* No such device */
+
+    /*
+     * If the device is in error recovery, wait until it is done.
+     * If the device is offline, then disallow any access to it.
+     */
+    if (!scsi_block_when_processing_errors(rscsi_disks[target].device)) {
+	return -ENXIO;
+    }
+    /*
+     * Make sure that only one process can do a check_change_disk at one time.
+     * This is also used to lock out further access when the partition table
+     * is being re-read.
+     */
+
+    while (rscsi_disks[target].device->busy) {
+	barrier();
+	cpu_relax();
+    }
+    /*
+     * The following code can sleep.
+     * Module unloading must be prevented
+     */
+    SDev = rscsi_disks[target].device;
+    if (SDev->host->hostt->module)
+	__MOD_INC_USE_COUNT(SDev->host->hostt->module);
+    if (sd_template.module)
+	__MOD_INC_USE_COUNT(sd_template.module);
+    SDev->access_count++;
+
+#if 0 
+    if (rscsi_disks[target].device->removable) {
+	SDev->allow_revalidate = 1;
+	check_disk_change(inode->i_rdev);
+	SDev->allow_revalidate = 0;
+
+
+	/*
+	 * If the drive is empty, just let the open fail.
+	 */
+	if ((!rscsi_disks[target].ready) && !(filp->f_flags & O_NDELAY)) {
+	    retval = -ENOMEDIUM;
+	    goto error_out;
+	}
+
+	/*
+	 * Similarly, if the device has the write protect tab set,
+	 * have the open fail if the user expects to be able to write
+	 * to the thing.
+	 */
+	if ((rscsi_disks[target].write_prot) && (filp->f_mode & 2)) {
+	    retval = -EROFS;
+	    goto error_out;
+	}
+    }
+#endif
+
+    /*
+     * It is possible that the disk changing stuff resulted in the device
+     * being taken offline.  If this is the case, report this to the user,
+     * and don't pretend that
+     * the open actually succeeded.
+     */
+    if (!SDev->online) {
+	goto error_out;
+    }
+    /*
+     * See if we are requesting a non-existent partition.  Do this
+     * after checking for disk change.
+     */
+    if (sd_sizes[SD_PARTITION(inode->i_rdev)] == 0) {
+	goto error_out;
+    }
+
+    if (SDev->removable)
+	if (SDev->access_count==1)
+	    if (scsi_block_when_processing_errors(SDev))
+		scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL);
+
+	
+    return 0;
+
+ error_out:
+    SDev->access_count--;
+    if (SDev->host->hostt->module)
+	__MOD_DEC_USE_COUNT(SDev->host->hostt->module);
+    if (sd_template.module)
+	__MOD_DEC_USE_COUNT(sd_template.module);
+    return retval;	
+}
+
+static int sd_release(struct inode *inode, struct file *file)
+{
+    int target;
+    Scsi_Device * SDev;
+    
+    target = DEVICE_NR(inode->i_rdev);
+    SDev = rscsi_disks[target].device;
+    if (!SDev)
+	return -ENODEV;
+    
+    SDev->access_count--;
+    
+    if (SDev->removable) {
+	if (!SDev->access_count)
+	    if (scsi_block_when_processing_errors(SDev))
+		scsi_ioctl(SDev, SCSI_IOCTL_DOORUNLOCK, NULL);
+    }
+    if (SDev->host->hostt->module)
+	__MOD_DEC_USE_COUNT(SDev->host->hostt->module);
+    if (sd_template.module)
+	__MOD_DEC_USE_COUNT(sd_template.module);
+    return 0;
+}
+
+static struct block_device_operations sd_fops =
+{
+/*	owner:			THIS_MODULE, */
+	open:			sd_open,
+	release:		sd_release,
+	ioctl:			sd_ioctl,
+	check_media_change:	check_scsidisk_media_change,
+	revalidate:		fop_revalidate_scsidisk
+};
+
+/*
+ *    If we need more than one SCSI disk major (i.e. more than
+ *      16 SCSI disks), we'll have to kmalloc() more gendisks later.
+ */
+
+static struct gendisk sd_gendisk =
+{
+	major:		SCSI_DISK0_MAJOR,
+	major_name:	"sd",
+	minor_shift:	4,
+	max_p:		1 << 4,
+	fops:		&sd_fops,
+};
+
+#define SD_GENDISK(i)    sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR]
+
+/*
+ * rw_intr is the interrupt routine for the device driver.
+ * It will be notified on the end of a SCSI read / write, and
+ * will take one of several actions based on success or failure.
+ */
+
+static void rw_intr(Scsi_Cmnd * SCpnt)
+{
+    int result = SCpnt->result;
+#if CONFIG_SCSI_LOGGING
+    char nbuff[6];
+#endif
+    int this_count = SCpnt->bufflen >> 9;
+    int good_sectors = (result == 0 ? this_count : 0);
+    int block_sectors = 1;
+    long error_sector;
+    
+    SCSI_LOG_HLCOMPLETE(1, sd_devname(DEVICE_NR(SCpnt->request.rq_dev), 
+				      nbuff));
+    
+    SCSI_LOG_HLCOMPLETE(1, printk("%s : rw_intr(%d, %x [%x %x])\n", nbuff,
+				  SCpnt->host->host_no,
+				  result,
+				  SCpnt->sense_buffer[0],
+				  SCpnt->sense_buffer[2]));
+    
+    /*
+      Handle MEDIUM ERRORs that indicate partial success.  Since this is a
+      relatively rare error condition, no care is taken to avoid
+      unnecessary additional work such as memcpy's that could be avoided.
+    */
+    
+    /* An error occurred */
+    if (driver_byte(result) != 0 && 	/* An error occured */
+	SCpnt->sense_buffer[0] == 0xF0) {	/* Sense data is valid */
+	switch (SCpnt->sense_buffer[2]) {
+	case MEDIUM_ERROR:
+	    error_sector = (SCpnt->sense_buffer[3] << 24) |
+		(SCpnt->sense_buffer[4] << 16) |
+		(SCpnt->sense_buffer[5] << 8) |
+		SCpnt->sense_buffer[6];
+	    if (SCpnt->request.bh != NULL)
+		block_sectors = SCpnt->request.bh->b_size >> 9;
+	    switch (SCpnt->device->sector_size) {
+	    case 1024:
+		error_sector <<= 1;
+		if (block_sectors < 2)
+		    block_sectors = 2;
+		break;
+	    case 2048:
+		error_sector <<= 2;
+		if (block_sectors < 4)
+		    block_sectors = 4;
+		break;
+	    case 4096:
+		error_sector <<=3;
+		if (block_sectors < 8)
+		    block_sectors = 8;
+		break;
+	    case 256:
+		error_sector >>= 1;
+		break;
+	    default:
+		break;
+	    }
+	    error_sector -= sd_gendisks[SD_MAJOR_IDX(
+		SCpnt->request.rq_dev)].part[MINOR(
+		    SCpnt->request.rq_dev)].start_sect;
+	    error_sector &= ~(block_sectors - 1);
+	    good_sectors = error_sector - SCpnt->request.sector;
+	    if (good_sectors < 0 || good_sectors >= this_count)
+		good_sectors = 0;
+	    break;
+	    
+	case RECOVERED_ERROR:
+	    /*
+	     * An error occured, but it recovered.  Inform the
+	     * user, but make sure that it's not treated as a
+	     * hard error.
+	     */
+	    print_sense("sd", SCpnt);
+	    result = 0;
+	    SCpnt->sense_buffer[0] = 0x0;
+	    good_sectors = this_count;
+	    break;
+	    
+	case ILLEGAL_REQUEST:
+	    if (SCpnt->device->ten == 1) {
+		if (SCpnt->cmnd[0] == READ_10 ||
+		    SCpnt->cmnd[0] == WRITE_10)
+		    SCpnt->device->ten = 0;
+	    }
+	    break;
+	    
+	default:
+	    break;
+	}
+    }
+    /*
+     * This calls the generic completion function, now that we know
+     * how many actual sectors finished, and how many sectors we need
+     * to say have failed.
+     */
+    scsi_io_completion(SCpnt, good_sectors, block_sectors);
+}
+/*
+ * requeue_sd_request() is the request handler function for the sd driver.
+ * Its function in life is to take block device requests, and translate
+ * them to SCSI commands.
+ */
+
+
+static int check_scsidisk_media_change(kdev_t full_dev)
+{
+    int retval;
+    int target;
+    int flag = 0;
+    Scsi_Device * SDev;
+
+    target = DEVICE_NR(full_dev);
+    SDev = rscsi_disks[target].device;
+
+    if (target >= sd_template.dev_max || !SDev) {
+	printk("SCSI disk request error: invalid device.\n");
+	return 0;
+    }
+    if (!SDev->removable)
+	return 0;
+
+    /*
+     * If the device is offline, don't send any commands - just pretend as
+     * if the command failed.  If the device ever comes back online, we
+     * can deal with it then.  It is only because of unrecoverable errors
+     * that we would ever take a device offline in the first place.
+     */
+    if (SDev->online == FALSE) {
+	rscsi_disks[target].ready = 0;
+	SDev->changed = 1;
+	return 1;	/* This will force a flush, if called from
+			 * check_disk_change */
+    }
+
+    /* Using Start/Stop enables differentiation between drive with
+     * no cartridge loaded - NOT READY, drive with changed cartridge -
+     * UNIT ATTENTION, or with same cartridge - GOOD STATUS.
+     * This also handles drives that auto spin down. eg iomega jaz 1GB
+     * as this will spin up the drive.
+     */
+    retval = -ENODEV;
+    if (scsi_block_when_processing_errors(SDev))
+	retval = scsi_ioctl(SDev, SCSI_IOCTL_START_UNIT, NULL);
+
+    if (retval) {		/* Unable to test, unit probably not ready.
+				 * This usually means there is no disc in the
+				 * drive.  Mark as changed, and we will figure
+				 * it out later once the drive is available
+				 * again.  */
+
+	rscsi_disks[target].ready = 0;
+	SDev->changed = 1;
+	return 1;	/* This will force a flush, if called from
+			 * check_disk_change */
+    }
+    /*
+     * for removable scsi disk ( FLOPTICAL ) we have to recognise the
+     * presence of disk in the drive. This is kept in the Scsi_Disk
+     * struct and tested at open !  Daniel Roche ( dan@lectra.fr )
+     */
+
+    rscsi_disks[target].ready = 1;	/* FLOPTICAL */
+
+    retval = SDev->changed;
+    if (!flag)
+	SDev->changed = 0;
+    return retval;
+}
+
+static int sd_init_onedisk(int i)
+{
+    unsigned char cmd[10];
+    char nbuff[6];
+    unsigned char *buffer;
+    unsigned long spintime_value = 0;
+    int the_result, retries, spintime;
+    int sector_size;
+    Scsi_Request *SRpnt;
+
+    /*
+     * Get the name of the disk, in case we need to log it somewhere.
+     */
+    sd_devname(i, nbuff);
+
+    /*
+     * If the device is offline, don't try and read capacity or any
+     * of the other niceties.
+     */
+    if (rscsi_disks[i].device->online == FALSE)
+	return i;
+
+    /*
+     * We need to retry the READ_CAPACITY because a UNIT_ATTENTION is
+     * considered a fatal error, and many devices report such an error
+     * just after a scsi bus reset.
+     */
+
+    SRpnt = scsi_allocate_request(rscsi_disks[i].device);
+    if (!SRpnt) {
+	printk(KERN_WARNING 
+	       "(sd_init_onedisk:) Request allocation failure.\n");
+	return i;
+    }
+
+    buffer = (unsigned char *) scsi_malloc(512);
+    if (!buffer) {
+	printk(KERN_WARNING "(sd_init_onedisk:) Memory allocation failure.\n");
+	scsi_release_request(SRpnt);
+	return i;
+    }
+
+    spintime = 0;
+
+    /* Spin up drives, as required.  Only do this at boot time */
+    /* Spinup needs to be done for module loads too. */
+    do {
+	retries = 0;
+
+	while (retries < 3) {
+	    cmd[0] = TEST_UNIT_READY;
+	    cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+		((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+	    memset((void *) &cmd[2], 0, 8);
+	    SRpnt->sr_cmd_len = 0;
+	    SRpnt->sr_sense_buffer[0] = 0;
+	    SRpnt->sr_sense_buffer[2] = 0;
+	    SRpnt->sr_data_direction = SCSI_DATA_NONE;
+
+	    scsi_wait_req (SRpnt, (void *) cmd, (void *) buffer,
+			   0/*512*/, SD_TIMEOUT, MAX_RETRIES);
+
+	    the_result = SRpnt->sr_result;
+	    retries++;
+	    if (the_result == 0
+		|| SRpnt->sr_sense_buffer[2] != UNIT_ATTENTION)
+		break;
+	}
+
+	/*
+	 * If the drive has indicated to us that it doesn't have
+	 * any media in it, don't bother with any of the rest of
+	 * this crap.
+	 */
+	if( the_result != 0
+	    && ((driver_byte(the_result) & DRIVER_SENSE) != 0)
+	    && SRpnt->sr_sense_buffer[2] == UNIT_ATTENTION
+	    && SRpnt->sr_sense_buffer[12] == 0x3A ) {
+	    rscsi_disks[i].capacity = 0x1fffff;
+	    sector_size = 512;
+	    rscsi_disks[i].device->changed = 1;
+	    rscsi_disks[i].ready = 0;
+	    break;
+	}
+
+	/* Look for non-removable devices that return NOT_READY.
+	 * Issue command to spin up drive for these cases. */
+	if (the_result && !rscsi_disks[i].device->removable &&
+	    SRpnt->sr_sense_buffer[2] == NOT_READY) {
+	    unsigned long time1;
+	    if (!spintime) {
+		printk("%s: Spinning up disk...", nbuff);
+		cmd[0] = START_STOP;
+		cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+		    ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+		cmd[1] |= 1;	/* Return immediately */
+		memset((void *) &cmd[2], 0, 8);
+		cmd[4] = 1;	/* Start spin cycle */
+		SRpnt->sr_cmd_len = 0;
+		SRpnt->sr_sense_buffer[0] = 0;
+		SRpnt->sr_sense_buffer[2] = 0;
+
+		SRpnt->sr_data_direction = SCSI_DATA_READ;
+		scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+			      0/*512*/, SD_TIMEOUT, MAX_RETRIES);
+		spintime_value = jiffies;
+	    }
+	    spintime = 1;
+	    time1 = HZ;
+	    /* Wait 1 second for next try */
+	    do {
+		current->state = TASK_UNINTERRUPTIBLE;
+		time1 = schedule_timeout(time1);
+	    } while(time1);
+	    printk(".");
+	}
+    } while (the_result && spintime &&
+	     time_after(spintime_value + 100 * HZ, jiffies));
+    if (spintime) {
+	if (the_result)
+	    printk("not responding...\n");
+	else
+	    printk("ready\n");
+    }
+    retries = 3;
+    do {
+	cmd[0] = READ_CAPACITY;
+	cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+	    ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+	memset((void *) &cmd[2], 0, 8);
+	memset((void *) buffer, 0, 8);
+	SRpnt->sr_cmd_len = 0;
+	SRpnt->sr_sense_buffer[0] = 0;
+	SRpnt->sr_sense_buffer[2] = 0;
+
+	SRpnt->sr_data_direction = SCSI_DATA_READ;
+	scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+		      8, SD_TIMEOUT, MAX_RETRIES);
+
+	the_result = SRpnt->sr_result;
+	retries--;
+
+    } while (the_result && retries);
+
+    /*
+     * The SCSI standard says:
+     * "READ CAPACITY is necessary for self configuring software"
+     *  While not mandatory, support of READ CAPACITY is strongly
+     *  encouraged.
+     *  We used to die if we couldn't successfully do a READ CAPACITY.
+     *  But, now we go on about our way.  The side effects of this are
+     *
+     *  1. We can't know block size with certainty. I have said
+     *     "512 bytes is it" as this is most common.
+     *
+     *  2. Recovery from when someone attempts to read past the
+     *     end of the raw device will be slower.
+     */
+
+    if (the_result) {
+	printk("%s : READ CAPACITY failed.\n"
+	       "%s : status = %x, message = %02x, host = %d, driver = %02x \n",
+	       nbuff, nbuff,
+	       status_byte(the_result),
+	       msg_byte(the_result),
+	       host_byte(the_result),
+	       driver_byte(the_result)
+	    );
+	if (driver_byte(the_result) & DRIVER_SENSE)
+	    print_req_sense("sd", SRpnt);
+	else
+	    printk("%s : sense not available. \n", nbuff);
+
+	printk("%s : block size assumed to be 512 bytes, disk size 1GB.  \n",
+	       nbuff);
+	rscsi_disks[i].capacity = 0x1fffff;
+	sector_size = 512;
+
+	/* Set dirty bit for removable devices if not ready -
+	 * sometimes drives will not report this properly. */
+	if (rscsi_disks[i].device->removable &&
+	    SRpnt->sr_sense_buffer[2] == NOT_READY)
+	    rscsi_disks[i].device->changed = 1;
+
+    } else {
+	/*
+	 * FLOPTICAL, if read_capa is ok, drive is assumed to be ready
+	 */
+	rscsi_disks[i].ready = 1;
+
+	rscsi_disks[i].capacity = 1 + ((buffer[0] << 24) |
+				       (buffer[1] << 16) |
+				       (buffer[2] << 8) |
+				       buffer[3]);
+
+	sector_size = (buffer[4] << 24) |
+	    (buffer[5] << 16) | (buffer[6] << 8) | buffer[7];
+
+	if (sector_size == 0) {
+	    sector_size = 512;
+	    printk("%s : sector size 0 reported, assuming 512.\n",
+		   nbuff);
+	}
+	if (sector_size != 512 &&
+	    sector_size != 1024 &&
+	    sector_size != 2048 &&
+	    sector_size != 4096 &&
+	    sector_size != 256) {
+	    printk("%s : unsupported sector size %d.\n",
+		   nbuff, sector_size);
+	    /*
+	     * The user might want to re-format the drive with
+	     * a supported sectorsize.  Once this happens, it
+	     * would be relatively trivial to set the thing up.
+	     * For this reason, we leave the thing in the table.
+	     */
+	    rscsi_disks[i].capacity = 0;
+	}
+	if (sector_size > 1024) {
+	    int m;
+
+	    /*
+	     * We must fix the sd_blocksizes and sd_hardsizes
+	     * to allow us to read the partition tables.
+	     * The disk reading code does not allow for reading
+	     * of partial sectors.
+	     */
+	    for (m = i << 4; m < ((i + 1) << 4); m++) {
+		sd_blocksizes[m] = sector_size;
+	    }
+	} {
+	    /*
+	     * The msdos fs needs to know the hardware sector size
+	     * So I have created this table. See ll_rw_blk.c
+	     * Jacques Gelinas (Jacques@solucorp.qc.ca)
+	     */
+	    int m;
+	    int hard_sector = sector_size;
+	    int sz = rscsi_disks[i].capacity * (hard_sector/256);
+
+	    /* There are 16 minors allocated for each major device */
+	    for (m = i << 4; m < ((i + 1) << 4); m++) {
+		sd_hardsizes[m] = hard_sector;
+	    }
+
+	    printk("SCSI device %s: "
+		   "%d %d-byte hdwr sectors (%d MB)\n",
+		   nbuff, rscsi_disks[i].capacity,
+		   hard_sector, (sz/2 - sz/1250 + 974)/1950);
+	}
+
+	/* Rescale capacity to 512-byte units */
+	if (sector_size == 4096)
+	    rscsi_disks[i].capacity <<= 3;
+	if (sector_size == 2048)
+	    rscsi_disks[i].capacity <<= 2;
+	if (sector_size == 1024)
+	    rscsi_disks[i].capacity <<= 1;
+	if (sector_size == 256)
+	    rscsi_disks[i].capacity >>= 1;
+    }
+
+
+    /*
+     * Unless otherwise specified, this is not write protected.
+     */
+    rscsi_disks[i].write_prot = 0;
+    if (rscsi_disks[i].device->removable && rscsi_disks[i].ready) {
+	/* FLOPTICAL */
+
+	/*
+	 * For removable scsi disk ( FLOPTICAL ) we have to recognise
+	 * the Write Protect Flag. This flag is kept in the Scsi_Disk
+	 * struct and tested at open !
+	 * Daniel Roche ( dan@lectra.fr )
+	 *
+	 * Changed to get all pages (0x3f) rather than page 1 to
+	 * get around devices which do not have a page 1.  Since
+	 * we're only interested in the header anyway, this should
+	 * be fine.
+	 *   -- Matthew Dharm (mdharm-scsi@one-eyed-alien.net)
+	 */
+
+	memset((void *) &cmd[0], 0, 8);
+	cmd[0] = MODE_SENSE;
+	cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+	    ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+	cmd[2] = 0x3f;	/* Get all pages */
+	cmd[4] = 255;   /* Ask for 255 bytes, even tho we want just the first 8 */
+	SRpnt->sr_cmd_len = 0;
+	SRpnt->sr_sense_buffer[0] = 0;
+	SRpnt->sr_sense_buffer[2] = 0;
+
+	/* same code as READCAPA !! */
+	SRpnt->sr_data_direction = SCSI_DATA_READ;
+	scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+		      512, SD_TIMEOUT, MAX_RETRIES);
+
+	the_result = SRpnt->sr_result;
+
+	if (the_result) {
+	    printk("%s: test WP failed, assume Write Enabled\n", nbuff);
+	} else {
+	    rscsi_disks[i].write_prot = ((buffer[2] & 0x80) != 0);
+	    printk("%s: Write Protect is %s\n", nbuff,
+		   rscsi_disks[i].write_prot ? "on" : "off");
+	}
+
+    }			/* check for write protect */
+    SRpnt->sr_device->ten = 1;
+    SRpnt->sr_device->remap = 1;
+    SRpnt->sr_device->sector_size = sector_size;
+    /* Wake up a process waiting for device */
+    scsi_release_request(SRpnt);
+    SRpnt = NULL;
+
+    scsi_free(buffer, 512);
+    return i;
+}
+
+/*
+ * The sd_init() function looks at all SCSI drives present, determines
+ * their size, and reads partition table entries for them.
+ */
+
+static int sd_registered;
+
+static int sd_init()
+{
+    int i;
+
+    if (sd_template.dev_noticed == 0)
+	return 0;
+
+    if (!rscsi_disks)
+	sd_template.dev_max = sd_template.dev_noticed + SD_EXTRA_DEVS;
+
+    if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR)
+	sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR;
+
+    if (!sd_registered) {
+	for (i = 0; i < N_USED_SD_MAJORS; i++) {
+#ifdef DEVFS_MUST_DIE
+	    if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) {
+		printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i));
+		sd_template.dev_noticed = 0;
+		return 1;
+	    }
+#endif
+	}
+	sd_registered++;
+    }
+    /* We do not support attaching loadable devices yet. */
+    if (rscsi_disks)
+	return 0;
+
+    rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC);
+    if (!rscsi_disks)
+	goto cleanup_devfs;
+    memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk));
+
+    /* for every (necessary) major: */
+    sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
+    if (!sd_sizes)
+	goto cleanup_disks;
+    memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int));
+
+    sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), 
+			    GFP_ATOMIC);
+    if (!sd_blocksizes)
+	goto cleanup_sizes;
+	
+    sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), 
+			   GFP_ATOMIC);
+    if (!sd_hardsizes)
+	goto cleanup_blocksizes;
+
+    sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int), 
+			     GFP_ATOMIC);
+    if (!sd_max_sectors)
+	goto cleanup_max_sectors;
+
+    for (i = 0; i < sd_template.dev_max << 4; i++) {
+	sd_blocksizes[i] = 1024;
+	sd_hardsizes[i] = 512;
+	/*
+	 * Allow lowlevel device drivers to generate 512k large scsi
+	 * commands if they know what they're doing and they ask for it
+	 * explicitly via the SHpnt->max_sectors API.
+	 */
+	sd_max_sectors[i] = MAX_SEGMENTS*8;
+    }
+
+    for (i = 0; i < N_USED_SD_MAJORS; i++) {
+	blksize_size[SD_MAJOR(i)] = sd_blocksizes + 
+	    i * (SCSI_DISKS_PER_MAJOR << 4);
+	hardsect_size[SD_MAJOR(i)] = sd_hardsizes + 
+	    i * (SCSI_DISKS_PER_MAJOR << 4);
+	max_sectors[SD_MAJOR(i)] = sd_max_sectors + 
+	    i * (SCSI_DISKS_PER_MAJOR << 4);
+    }
+
+    sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), 
+			  GFP_ATOMIC);
+    if (!sd_gendisks)
+	goto cleanup_sd_gendisks;
+    for (i = 0; i < N_USED_SD_MAJORS; i++) {
+	sd_gendisks[i] = sd_gendisk;	/* memcpy */
+#ifdef DEVFS_MUST_DIE
+	sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * 
+					 sizeof *sd_gendisks[i].de_arr,
+					 GFP_ATOMIC);
+	if (!sd_gendisks[i].de_arr)
+	    goto cleanup_gendisks_de_arr;
+	memset (sd_gendisks[i].de_arr, 0,
+		SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr);
+#endif
+	sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * 
+					sizeof *sd_gendisks[i].flags,
+					GFP_ATOMIC);
+	if (!sd_gendisks[i].flags)
+	    goto cleanup_gendisks_flags;
+	memset (sd_gendisks[i].flags, 0,
+		SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags);
+	sd_gendisks[i].major = SD_MAJOR(i);
+	sd_gendisks[i].major_name = "sd";
+	sd_gendisks[i].minor_shift = 4;
+	sd_gendisks[i].max_p = 1 << 4;
+	sd_gendisks[i].part = kmalloc((SCSI_DISKS_PER_MAJOR << 4) * 
+				      sizeof(struct hd_struct),
+				      GFP_ATOMIC);
+	if (!sd_gendisks[i].part)
+	    goto cleanup_gendisks_part;
+	memset(sd_gendisks[i].part, 0, (SCSI_DISKS_PER_MAJOR << 4) * 
+	       sizeof(struct hd_struct));
+	sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4);
+	sd_gendisks[i].nr_real = 0;
+	sd_gendisks[i].real_devices =
+	    (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR);
+    }
+
+    return 0;
+
+ cleanup_gendisks_part:
+    kfree(sd_gendisks[i].flags);
+ cleanup_gendisks_flags:
+#ifdef DEVFS_MUST_DIE
+    kfree(sd_gendisks[i].de_arr);
+ cleanup_gendisks_de_arr:
+#endif
+    while (--i >= 0 ) {
+#ifdef DEVFS_MUST_DIE
+	kfree(sd_gendisks[i].de_arr);
+#endif
+	kfree(sd_gendisks[i].flags);
+	kfree(sd_gendisks[i].part);
+    }
+    kfree(sd_gendisks);
+    sd_gendisks = NULL;
+ cleanup_sd_gendisks:
+    kfree(sd_max_sectors);
+ cleanup_max_sectors:
+    kfree(sd_hardsizes);
+ cleanup_blocksizes:
+    kfree(sd_blocksizes);
+ cleanup_sizes:
+    kfree(sd_sizes);
+ cleanup_disks:
+    kfree(rscsi_disks);
+    rscsi_disks = NULL;
+ cleanup_devfs:
+#ifdef DEVFS_MUST_DIE
+    for (i = 0; i < N_USED_SD_MAJORS; i++) {
+	devfs_unregister_blkdev(SD_MAJOR(i), "sd");
+    }
+#endif
+    sd_registered--;
+    sd_template.dev_noticed = 0;
+    return 1;
+}
+
+
+static void sd_finish()
+{
+    int i;
+
+    for (i = 0; i < N_USED_SD_MAJORS; i++) {
+	blk_dev[SD_MAJOR(i)].queue = sd_find_queue;
+	add_gendisk(&sd_gendisks[i]);
+    }
+
+    for (i = 0; i < sd_template.dev_max; ++i)
+	if (!rscsi_disks[i].capacity && rscsi_disks[i].device) {
+	    sd_init_onedisk(i);
+	    if (!rscsi_disks[i].has_part_table) {
+		sd_sizes[i << 4] = rscsi_disks[i].capacity;
+		register_disk(&SD_GENDISK(i), MKDEV_SD(i),
+			      1<<4, &sd_fops,
+			      rscsi_disks[i].capacity);
+		rscsi_disks[i].has_part_table = 1;
+	    }
+	}
+#if 0 
+    /* If our host adapter is capable of scatter-gather, then we increase
+     * the read-ahead to 60 blocks (120 sectors).  If not, we use
+     * a two block (4 sector) read ahead. We can only respect this with the
+     * granularity of every 16 disks (one device major).
+     */
+    for (i = 0; i < N_USED_SD_MAJORS; i++) {
+	read_ahead[SD_MAJOR(i)] =
+	    (rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device
+	     && rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device->host->sg_tablesize)
+	    ? 120	/* 120 sector read-ahead */
+	    : 4;	/* 4 sector read-ahead */
+    }
+#endif
+
+    return;
+}
+
+static int sd_detect(Scsi_Device * SDp)
+{
+    if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD)
+	return 0;
+    sd_template.dev_noticed++;
+    return 1;
+}
+
+static int sd_attach(Scsi_Device * SDp)
+{
+    unsigned int devnum;
+    Scsi_Disk *dpnt;
+    int i;
+    char nbuff[6];
+
+    if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD)
+	return 0;
+
+    if (sd_template.nr_dev >= sd_template.dev_max || rscsi_disks == NULL) {
+	SDp->attached--;
+	return 1;
+    }
+    for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
+	if (!dpnt->device)
+	    break;
+
+    if (i >= sd_template.dev_max) {
+	printk(KERN_WARNING "scsi_devices corrupt (sd),"
+	       " nr_dev %d dev_max %d\n",
+	       sd_template.nr_dev, sd_template.dev_max);
+	SDp->attached--;
+	return 1;
+    }
+
+    rscsi_disks[i].device = SDp;
+    rscsi_disks[i].has_part_table = 0;
+    sd_template.nr_dev++;
+    SD_GENDISK(i).nr_real++;
+    devnum = i % SCSI_DISKS_PER_MAJOR;
+#ifdef DEVFS_MUST_DIE
+    SD_GENDISK(i).de_arr[devnum] = SDp->de;
+#endif
+    if (SDp->removable)
+	SD_GENDISK(i).flags[devnum] |= GENHD_FL_REMOVABLE;
+    sd_devname(i, nbuff);
+    printk("Attached scsi %sdisk %s at scsi%d, channel %d, id %d, lun %d\n",
+	   SDp->removable ? "removable " : "",
+	   nbuff, SDp->host->host_no, SDp->channel, SDp->id, SDp->lun);
+    return 0;
+}
+
+#define DEVICE_BUSY rscsi_disks[target].device->busy
+#define ALLOW_REVALIDATE rscsi_disks[target].device->allow_revalidate
+#define USAGE rscsi_disks[target].device->access_count
+#define CAPACITY rscsi_disks[target].capacity
+#define MAYBE_REINIT  sd_init_onedisk(target)
+
+/* This routine is called to flush all partitions and partition tables
+ * for a changed scsi disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0.  If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int revalidate_scsidisk(kdev_t dev, int maxusage)
+{
+    struct gendisk *sdgd;
+    int target;
+    int max_p;
+    int start;
+    int i;
+
+    target = DEVICE_NR(dev);
+
+    if (DEVICE_BUSY || (ALLOW_REVALIDATE == 0 && USAGE > maxusage)) {
+	printk("Device busy for revalidation (usage=%d)\n", USAGE);
+	return -EBUSY;
+    }
+    DEVICE_BUSY = 1;
+
+    sdgd = &SD_GENDISK(target);
+    max_p = sd_gendisk.max_p;
+    start = target << sd_gendisk.minor_shift;
+
+    for (i = max_p - 1; i >= 0; i--) {
+	int index = start + i;
+	invalidate_device(MKDEV_SD_PARTITION(index), 1);
+	sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0;
+	sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0;
+	/*
+	 * Reset the blocksize for everything so that we can read
+	 * the partition table.  Technically we will determine the
+	 * correct block size when we revalidate, but we do this just
+	 * to make sure that everything remains consistent.
+	 */
+	sd_blocksizes[index] = 1024;
+	if (rscsi_disks[target].device->sector_size == 2048)
+	    sd_blocksizes[index] = 2048;
+	else
+	    sd_blocksizes[index] = 1024;
+    }
+
+#ifdef MAYBE_REINIT
+    MAYBE_REINIT;
+#endif
+
+    grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR,
+		    1<<4, CAPACITY);
+
+    DEVICE_BUSY = 0;
+    return 0;
+}
+
+static int fop_revalidate_scsidisk(kdev_t dev)
+{
+    return revalidate_scsidisk(dev, 0);
+}
+
+static void sd_detach(Scsi_Device * SDp)
+{
+    Scsi_Disk *dpnt;
+    struct gendisk *sdgd;
+    int i, j;
+    int max_p;
+    int start;
+    
+    if (rscsi_disks == NULL)
+	return;
+    
+    for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
+	if (dpnt->device == SDp) {
+	    
+	    /* If we are disconnecting a disk driver, sync and invalidate
+	     * everything */
+	    sdgd = &SD_GENDISK(i);
+	    max_p = sd_gendisk.max_p;
+	    start = i << sd_gendisk.minor_shift;
+	    
+	    for (j = max_p - 1; j >= 0; j--) {
+		int index = start + j;
+		invalidate_device(MKDEV_SD_PARTITION(index), 1);
+		sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0;
+		sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0;
+		sd_sizes[index] = 0;
+	    }
+#ifdef DEVFS_MUST_DIE
+	    devfs_register_partitions (sdgd,
+				       SD_MINOR_NUMBER (start), 1);
+#endif
+	    /* unregister_disk() */
+	    dpnt->has_part_table = 0;
+	    dpnt->device = NULL;
+	    dpnt->capacity = 0;
+	    SDp->attached--;
+	    sd_template.dev_noticed--;
+	    sd_template.nr_dev--;
+	    SD_GENDISK(i).nr_real--;
+	    return;
+	}
+    return;
+}
+
+static int __init init_sd(void)
+{
+    sd_template.module = THIS_MODULE;
+    return scsi_register_module(MODULE_SCSI_DEV, &sd_template);
+}
+
+static void __exit exit_sd(void)
+{
+    int i;
+    
+#if 0
+    scsi_unregister_module(MODULE_SCSI_DEV, &sd_template);
+#endif
+    
+#ifdef DEVFS_MUST_DIE
+    for (i = 0; i < N_USED_SD_MAJORS; i++)
+	devfs_unregister_blkdev(SD_MAJOR(i), "sd");
+#endif
+    
+    sd_registered--;
+    if (rscsi_disks != NULL) {
+	kfree(rscsi_disks);
+	kfree(sd_sizes);
+	kfree(sd_blocksizes);
+	kfree(sd_hardsizes);
+	for (i = 0; i < N_USED_SD_MAJORS; i++) {
+#if 0 /* XXX aren't we forgetting to deallocate something? */
+	    kfree(sd_gendisks[i].de_arr);
+	    kfree(sd_gendisks[i].flags);
+#endif
+	    kfree(sd_gendisks[i].part);
+	}
+    }
+    for (i = 0; i < N_USED_SD_MAJORS; i++) {
+	del_gendisk(&sd_gendisks[i]);
+	blk_size[SD_MAJOR(i)] = NULL;	/* XXX blksize_size actually? */
+	hardsect_size[SD_MAJOR(i)] = NULL;
+#if 0
+	read_ahead[SD_MAJOR(i)] = 0;
+#endif
+    }
+    sd_template.dev_max = 0;
+    if (sd_gendisks != NULL)    /* kfree tests for 0, but leave explicit */
+	kfree(sd_gendisks);
+}
+
+module_init(init_sd);
+module_exit(exit_sd);
+MODULE_LICENSE("GPL");
diff --git a/xen/drivers/scsi/sd.h b/xen/drivers/scsi/sd.h
new file mode 100644
index 0000000000..8e29445839
--- /dev/null
+++ b/xen/drivers/scsi/sd.h
@@ -0,0 +1,66 @@
+/*
+ *    sd.h Copyright (C) 1992 Drew Eckhardt 
+ *      SCSI disk driver header file by
+ *              Drew Eckhardt 
+ *
+ *      <drew@colorado.edu>
+ *
+ *       Modified by Eric Youngdale eric@andante.org to
+ *       add scatter-gather, multiple outstanding request, and other
+ *       enhancements.
+ */
+#ifndef _SD_H
+#define _SD_H
+/*
+   $Header: /usr/src/linux/kernel/blk_drv/scsi/RCS/sd.h,v 1.1 1992/07/24 06:27:38 root Exp root $
+ */
+
+#ifndef _SCSI_H
+#include "scsi.h"
+#endif
+
+#ifndef _GENDISK_H
+#include <xeno/genhd.h>
+#endif
+
+typedef struct scsi_disk {
+	unsigned capacity;	/* size in blocks */
+	Scsi_Device *device;
+	unsigned char ready;	/* flag ready for FLOPTICAL */
+	unsigned char write_prot;	/* flag write_protect for rmvable dev */
+	unsigned char sector_bit_size;	/* sector_size = 2 to the  bit size power */
+	unsigned char sector_bit_shift;		/* power of 2 sectors per FS block */
+	unsigned has_part_table:1;	/* has partition table */
+} Scsi_Disk;
+
+extern int revalidate_scsidisk(kdev_t dev, int maxusage);
+
+/*
+ * Used by pmac to find the device associated with a target.
+ */
+extern kdev_t sd_find_target(void *host, int tgt);
+
+#define N_SD_MAJORS	8
+
+#define SD_MAJOR_MASK	(N_SD_MAJORS - 1)
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen/include/asm-i386/apic.h b/xen/include/asm-i386/apic.h
new file mode 100644
index 0000000000..574cc23203
--- /dev/null
+++ b/xen/include/asm-i386/apic.h
@@ -0,0 +1,96 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+//#include <linux/config.h>
+//#include <linux/pm.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#define APIC_DEBUG 0
+
+#if APIC_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned long v)
+{
+	*((volatile unsigned long *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+	xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline unsigned long apic_read(unsigned long reg)
+{
+	return *((volatile unsigned long *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+	do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+	/*
+	 * ack_APIC_irq() actually gets compiled as a single instruction:
+	 * - a single rmw on Pentium/82489DX
+	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+	 * ... yummie.
+	 */
+
+	/* Docs say use 0 for future compatibility */
+	apic_write_around(APIC_EOI, 0);
+}
+
+extern int get_maxlvt(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (void);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void setup_APIC_clocks (void);
+extern void setup_apic_nmi_watchdog (void);
+extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+
+//extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
+//extern void apic_pm_unregister(struct pm_dev*);
+
+extern unsigned int apic_timer_irqs [NR_CPUS];
+extern int check_nmi_watchdog (void);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE	0
+#define NMI_IO_APIC	1
+#define NMI_LOCAL_APIC	2
+#define NMI_INVALID	3
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff --git a/xen/include/asm-i386/apicdef.h b/xen/include/asm-i386/apicdef.h
new file mode 100644
index 0000000000..227bfca652
--- /dev/null
+++ b/xen/include/asm-i386/apicdef.h
@@ -0,0 +1,378 @@
+#ifndef __ASM_APICDEF_H
+#define __ASM_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define		APIC_DEFAULT_PHYS_BASE	0xfee00000
+ 
+#define		APIC_ID		0x20
+#define			APIC_ID_MASK		(0x0F<<24)
+#define			GET_APIC_ID(x)		(((x)>>24)&0x0F)
+#define		APIC_LVR	0x30
+#define			APIC_LVR_MASK		0xFF00FF
+#define			GET_APIC_VERSION(x)	((x)&0xFF)
+#define			GET_APIC_MAXLVT(x)	(((x)>>16)&0xFF)
+#define			APIC_INTEGRATED(x)	((x)&0xF0)
+#define		APIC_TASKPRI	0x80
+#define			APIC_TPRI_MASK		0xFF
+#define		APIC_ARBPRI	0x90
+#define			APIC_ARBPRI_MASK	0xFF
+#define		APIC_PROCPRI	0xA0
+#define		APIC_EOI	0xB0
+#define			APIC_EIO_ACK		0x0		/* Write this to the EOI register */
+#define		APIC_RRR	0xC0
+#define		APIC_LDR	0xD0
+#define			APIC_LDR_MASK		(0xFF<<24)
+#define			GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFF)
+#define			SET_APIC_LOGICAL_ID(x)	(((x)<<24))
+#define			APIC_ALL_CPUS		0xFF
+#define		APIC_DFR	0xE0
+#define			APIC_DFR_CLUSTER	0x0FFFFFFFul	/* Clustered */
+#define			APIC_DFR_FLAT		0xFFFFFFFFul	/* Flat mode */
+#define		APIC_SPIV	0xF0
+#define			APIC_SPIV_FOCUS_DISABLED	(1<<9)
+#define			APIC_SPIV_APIC_ENABLED		(1<<8)
+#define		APIC_ISR	0x100
+#define		APIC_TMR	0x180
+#define 	APIC_IRR	0x200
+#define 	APIC_ESR	0x280
+#define			APIC_ESR_SEND_CS	0x00001
+#define			APIC_ESR_RECV_CS	0x00002
+#define			APIC_ESR_SEND_ACC	0x00004
+#define			APIC_ESR_RECV_ACC	0x00008
+#define			APIC_ESR_SENDILL	0x00020
+#define			APIC_ESR_RECVILL	0x00040
+#define			APIC_ESR_ILLREGA	0x00080
+#define		APIC_ICR	0x300
+#define			APIC_DEST_SELF		0x40000
+#define			APIC_DEST_ALLINC	0x80000
+#define			APIC_DEST_ALLBUT	0xC0000
+#define			APIC_ICR_RR_MASK	0x30000
+#define			APIC_ICR_RR_INVALID	0x00000
+#define			APIC_ICR_RR_INPROG	0x10000
+#define			APIC_ICR_RR_VALID	0x20000
+#define			APIC_INT_LEVELTRIG	0x08000
+#define			APIC_INT_ASSERT		0x04000
+#define			APIC_ICR_BUSY		0x01000
+#define			APIC_DEST_PHYSICAL	0x00000
+#define			APIC_DEST_LOGICAL	0x00800
+#define			APIC_DM_FIXED		0x00000
+#define			APIC_DM_LOWEST		0x00100
+#define			APIC_DM_SMI		0x00200
+#define			APIC_DM_REMRD		0x00300
+#define			APIC_DM_NMI		0x00400
+#define			APIC_DM_INIT		0x00500
+#define			APIC_DM_STARTUP		0x00600
+#define			APIC_DM_EXTINT		0x00700
+#define			APIC_VECTOR_MASK	0x000FF
+#define		APIC_ICR2	0x310
+#define			GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
+#define			SET_APIC_DEST_FIELD(x)	((x)<<24)
+#define		APIC_LVTT	0x320
+#define		APIC_LVTPC	0x340
+#define		APIC_LVT0	0x350
+#define			APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
+#define			GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
+#define			SET_APIC_TIMER_BASE(x)		(((x)<<18))
+#define			APIC_TIMER_BASE_CLKIN		0x0
+#define			APIC_TIMER_BASE_TMBASE		0x1
+#define			APIC_TIMER_BASE_DIV		0x2
+#define			APIC_LVT_TIMER_PERIODIC		(1<<17)
+#define			APIC_LVT_MASKED			(1<<16)
+#define			APIC_LVT_LEVEL_TRIGGER		(1<<15)
+#define			APIC_LVT_REMOTE_IRR		(1<<14)
+#define			APIC_INPUT_POLARITY		(1<<13)
+#define			APIC_SEND_PENDING		(1<<12)
+#define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
+#define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
+#define				APIC_MODE_FIXED		0x0
+#define				APIC_MODE_NMI		0x4
+#define				APIC_MODE_EXINT		0x7
+#define 	APIC_LVT1	0x360
+#define		APIC_LVTERR	0x370
+#define		APIC_TMICT	0x380
+#define		APIC_TMCCT	0x390
+#define		APIC_TDCR	0x3E0
+#define			APIC_TDR_DIV_TMBASE	(1<<2)
+#define			APIC_TDR_DIV_1		0xB
+#define			APIC_TDR_DIV_2		0x0
+#define			APIC_TDR_DIV_4		0x1
+#define			APIC_TDR_DIV_8		0x2
+#define			APIC_TDR_DIV_16		0x3
+#define			APIC_TDR_DIV_32		0x8
+#define			APIC_TDR_DIV_64		0x9
+#define			APIC_TDR_DIV_128	0xA
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_IO_APICS 32
+#else
+#define MAX_IO_APICS 8
+#endif
+
+
+/*
+ * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs.  SAPICs
+ * don't broadcast (yet?), but if they did, they might use 0xFFFF.
+ */
+#define APIC_BROADCAST_ID_XAPIC (0xFF)
+#define APIC_BROADCAST_ID_APIC  (0x0F)
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+struct local_apic {
+
+/*000*/	struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/	struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/	struct { /* APIC ID Register */
+		u32   __reserved_1	: 24,
+			phys_apic_id	:  4,
+			__reserved_2	:  4;
+		u32 __reserved[3];
+	} id;
+
+/*030*/	const
+	struct { /* APIC Version Register */
+		u32   version		:  8,
+			__reserved_1	:  8,
+			max_lvt		:  8,
+			__reserved_2	:  8;
+		u32 __reserved[3];
+	} version;
+
+/*040*/	struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/	struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/	struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/	struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/	struct { /* Task Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} tpr;
+
+/*090*/	const
+	struct { /* Arbitration Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} apr;
+
+/*0A0*/	const
+	struct { /* Processor Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} ppr;
+
+/*0B0*/	struct { /* End Of Interrupt Register */
+		u32   eoi;
+		u32 __reserved[3];
+	} eoi;
+
+/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/	struct { /* Logical Destination Register */
+		u32   __reserved_1	: 24,
+			logical_dest	:  8;
+		u32 __reserved_2[3];
+	} ldr;
+
+/*0E0*/	struct { /* Destination Format Register */
+		u32   __reserved_1	: 28,
+			model		:  4;
+		u32 __reserved_2[3];
+	} dfr;
+
+/*0F0*/	struct { /* Spurious Interrupt Vector Register */
+		u32	spurious_vector	:  8,
+			apic_enabled	:  1,
+			focus_cpu	:  1,
+			__reserved_2	: 22;
+		u32 __reserved_3[3];
+	} svr;
+
+/*100*/	struct { /* In Service Register */
+/*170*/		u32 bitfield;
+		u32 __reserved[3];
+	} isr [8];
+
+/*180*/	struct { /* Trigger Mode Register */
+/*1F0*/		u32 bitfield;
+		u32 __reserved[3];
+	} tmr [8];
+
+/*200*/	struct { /* Interrupt Request Register */
+/*270*/		u32 bitfield;
+		u32 __reserved[3];
+	} irr [8];
+
+/*280*/	union { /* Error Status Register */
+		struct {
+			u32   send_cs_error			:  1,
+				receive_cs_error		:  1,
+				send_accept_error		:  1,
+				receive_accept_error		:  1,
+				__reserved_1			:  1,
+				send_illegal_vector		:  1,
+				receive_illegal_vector		:  1,
+				illegal_register_address	:  1,
+				__reserved_2			: 24;
+			u32 __reserved_3[3];
+		} error_bits;
+		struct {
+			u32 errors;
+			u32 __reserved_3[3];
+		} all_errors;
+	} esr;
+
+/*290*/	struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/	struct { /* Interrupt Command Register 1 */
+		u32   vector			:  8,
+			delivery_mode		:  3,
+			destination_mode	:  1,
+			delivery_status		:  1,
+			__reserved_1		:  1,
+			level			:  1,
+			trigger			:  1,
+			__reserved_2		:  2,
+			shorthand		:  2,
+			__reserved_3		:  12;
+		u32 __reserved_4[3];
+	} icr1;
+
+/*310*/	struct { /* Interrupt Command Register 2 */
+		union {
+			u32   __reserved_1	: 24,
+				phys_dest	:  4,
+				__reserved_2	:  4;
+			u32   __reserved_3	: 24,
+				logical_dest	:  8;
+		} dest;
+		u32 __reserved_4[3];
+	} icr2;
+
+/*320*/	struct { /* LVT - Timer */
+		u32   vector		:  8,
+			__reserved_1	:  4,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			timer_mode	:  1,
+			__reserved_3	: 14;
+		u32 __reserved_4[3];
+	} lvt_timer;
+
+/*330*/	struct { u32 __reserved[4]; } __reserved_15;
+
+/*340*/	struct { /* LVT - Performance Counter */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_pc;
+
+/*350*/	struct { /* LVT - LINT0 */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			polarity	:  1,
+			remote_irr	:  1,
+			trigger		:  1,
+			mask		:  1,
+			__reserved_2	: 15;
+		u32 __reserved_3[3];
+	} lvt_lint0;
+
+/*360*/	struct { /* LVT - LINT1 */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			polarity	:  1,
+			remote_irr	:  1,
+			trigger		:  1,
+			mask		:  1,
+			__reserved_2	: 15;
+		u32 __reserved_3[3];
+	} lvt_lint1;
+
+/*370*/	struct { /* LVT - Error */
+		u32   vector		:  8,
+			__reserved_1	:  4,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_error;
+
+/*380*/	struct { /* Timer Initial Count Register */
+		u32   initial_count;
+		u32 __reserved_2[3];
+	} timer_icr;
+
+/*390*/	const
+	struct { /* Timer Current Count Register */
+		u32   curr_count;
+		u32 __reserved_2[3];
+	} timer_ccr;
+
+/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/	struct { /* Timer Divide Configuration Register */
+		u32   divisor		:  4,
+			__reserved_1	: 28;
+		u32 __reserved_2[3];
+	} timer_dcr;
+
+/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#endif
diff --git a/xen/include/asm-i386/atomic.h b/xen/include/asm-i386/atomic.h
new file mode 100644
index 0000000000..70a1212ed6
--- /dev/null
+++ b/xen/include/asm-i386/atomic.h
@@ -0,0 +1,204 @@
+#ifndef __ARCH_I386_ATOMIC__
+#define __ARCH_I386_ATOMIC__
+
+#include <xeno/config.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_read(v)		((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ * 
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_set(v,i)		(((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.  Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK "addl %1,%0"
+		:"=m" (v->counter)
+		:"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK "subl %1,%0"
+		:"=m" (v->counter)
+		:"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "subl %2,%0; sete %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"ir" (i), "m" (v->counter) : "memory");
+	return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK "incl %0"
+		:"=m" (v->counter)
+		:"m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK "decl %0"
+		:"=m" (v->counter)
+		:"m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "decl %0; sete %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"m" (v->counter) : "memory");
+	return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "incl %0; sete %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"m" (v->counter) : "memory");
+	return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK "addl %2,%0; sets %1"
+		:"=m" (v->counter), "=qm" (c)
+		:"ir" (i), "m" (v->counter) : "memory");
+	return c;
+}
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+__asm__ __volatile__(LOCK "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+__asm__ __volatile__(LOCK "orl %0,%1" \
+: : "r" (mask),"m" (*addr) : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#endif
diff --git a/xen/include/asm-i386/bitops.h b/xen/include/asm-i386/bitops.h
new file mode 100644
index 0000000000..73bcd8ef5f
--- /dev/null
+++ b/xen/include/asm-i386/bitops.h
@@ -0,0 +1,368 @@
+#ifndef _I386_BITOPS_H
+#define _I386_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#include <xeno/config.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+	__asm__ __volatile__( LOCK_PREFIX
+		"btsl %1,%0"
+		:"=m" (ADDR)
+		:"Ir" (nr));
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(int nr, volatile void * addr)
+{
+	__asm__(
+		"btsl %1,%0"
+		:"=m" (ADDR)
+		:"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+	__asm__ __volatile__( LOCK_PREFIX
+		"btrl %1,%0"
+		:"=m" (ADDR)
+		:"Ir" (nr));
+}
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile void * addr)
+{
+	__asm__ __volatile__(
+		"btcl %1,%0"
+		:"=m" (ADDR)
+		:"Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void change_bit(int nr, volatile void * addr)
+{
+	__asm__ __volatile__( LOCK_PREFIX
+		"btcl %1,%0"
+		:"=m" (ADDR)
+		:"Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_set_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__( LOCK_PREFIX
+		"btsl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit),"=m" (ADDR)
+		:"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__(
+		"btsl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit),"=m" (ADDR)
+		:"Ir" (nr));
+	return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__( LOCK_PREFIX
+		"btrl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit),"=m" (ADDR)
+		:"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__(
+		"btrl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit),"=m" (ADDR)
+		:"Ir" (nr));
+	return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__(
+		"btcl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit),"=m" (ADDR)
+		:"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_change_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__( LOCK_PREFIX
+		"btcl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit),"=m" (ADDR)
+		:"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__(
+		"btl %2,%1\n\tsbbl %0,%0"
+		:"=r" (oldbit)
+		:"m" (ADDR),"Ir" (nr));
+	return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+{
+	int d0, d1, d2;
+	int res;
+
+	if (!size)
+		return 0;
+	/* This looks at memory. Mark it volatile to tell gcc not to move it around */
+	__asm__ __volatile__(
+		"movl $-1,%%eax\n\t"
+		"xorl %%edx,%%edx\n\t"
+		"repe; scasl\n\t"
+		"je 1f\n\t"
+		"xorl -4(%%edi),%%eax\n\t"
+		"subl $4,%%edi\n\t"
+		"bsfl %%eax,%%edx\n"
+		"1:\tsubl %%ebx,%%edi\n\t"
+		"shll $3,%%edi\n\t"
+		"addl %%edi,%%edx"
+		:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+		:"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
+	return res;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+{
+	unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+	int set = 0, bit = offset & 31, res;
+	
+	if (bit) {
+		/*
+		 * Look for zero in first byte
+		 */
+		__asm__("bsfl %1,%0\n\t"
+			"jne 1f\n\t"
+			"movl $32, %0\n"
+			"1:"
+			: "=r" (set)
+			: "r" (~(*p >> bit)));
+		if (set < (32 - bit))
+			return set + offset;
+		set = 32 - bit;
+		p++;
+	}
+	/*
+	 * No zero yet, search remaining full bytes for a zero
+	 */
+	res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+	return (offset + set + res);
+}
+
+/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static __inline__ unsigned long ffz(unsigned long word)
+{
+	__asm__("bsfl %1,%0"
+		:"=r" (word)
+		:"r" (~word));
+	return word;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+	int r;
+
+	__asm__("bsfl %1,%0\n\t"
+		"jnz 1f\n\t"
+		"movl $-1,%0\n"
+		"1:" : "=r" (r) : "g" (x));
+	return r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#define ext2_set_bit                 __test_and_set_bit
+#define ext2_clear_bit               __test_and_clear_bit
+#define ext2_test_bit                test_bit
+#define ext2_find_first_zero_bit     find_first_zero_bit
+#define ext2_find_next_zero_bit      find_next_zero_bit
+
+/* Bitmap functions for the minix filesystem.  */
+#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
+#define minix_set_bit(nr,addr) __set_bit(nr,addr)
+#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
+#define minix_test_bit(nr,addr) test_bit(nr,addr)
+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
+
+#endif /* _I386_BITOPS_H */
diff --git a/xen/include/asm-i386/byteorder.h b/xen/include/asm-i386/byteorder.h
new file mode 100644
index 0000000000..bbfb629fae
--- /dev/null
+++ b/xen/include/asm-i386/byteorder.h
@@ -0,0 +1,47 @@
+#ifndef _I386_BYTEORDER_H
+#define _I386_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef __GNUC__
+
+/* For avoiding bswap on i386 */
+#ifdef __KERNEL__
+#include <linux/config.h>
+#endif
+
+static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
+{
+#ifdef CONFIG_X86_BSWAP
+	__asm__("bswap %0" : "=r" (x) : "0" (x));
+#else
+	__asm__("xchgb %b0,%h0\n\t"	/* swap lower bytes	*/
+		"rorl $16,%0\n\t"	/* swap words		*/
+		"xchgb %b0,%h0"		/* swap higher bytes	*/
+		:"=q" (x)
+		: "0" (x));
+#endif
+	return x;
+}
+
+static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
+{
+	__asm__("xchgb %b0,%h0"		/* swap bytes		*/ \
+		: "=q" (x) \
+		:  "0" (x)); \
+		return x;
+}
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch__swab16(x) ___arch__swab16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#  define __BYTEORDER_HAS_U64__
+#  define __SWAB_64_THRU_32__
+#endif
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _I386_BYTEORDER_H */
diff --git a/xen/include/asm-i386/cache.h b/xen/include/asm-i386/cache.h
new file mode 100644
index 0000000000..502c8ba7a6
--- /dev/null
+++ b/xen/include/asm-i386/cache.h
@@ -0,0 +1,13 @@
+/*
+ * include/asm-i386/cache.h
+ */
+#ifndef __ARCH_I386_CACHE_H
+#define __ARCH_I386_CACHE_H
+
+#include <xeno/config.h>
+
+/* L1 cache line size */
+#define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
+
+#endif
diff --git a/xen/include/asm-i386/cpufeature.h b/xen/include/asm-i386/cpufeature.h
new file mode 100644
index 0000000000..85b8b43974
--- /dev/null
+++ b/xen/include/asm-i386/cpufeature.h
@@ -0,0 +1,76 @@
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_I386_CPUFEATURE_H
+#define __ASM_I386_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS	4	/* Currently we have 4 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
+#define X86_FEATURE_FPU		(0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		(0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		(0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE 	(0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	(0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		(0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		(0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES	(0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+				          /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64	(0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	(1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	(1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	(2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	(2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	(2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	(3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	(3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+#define cpu_has(c, bit)		test_bit(bit, (c)->x86_capability)
+
+#endif /* __ASM_I386_CPUFEATURE_H */
+
+/* 
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/xen/include/asm-i386/current.h b/xen/include/asm-i386/current.h
new file mode 100644
index 0000000000..bc1496a2c9
--- /dev/null
+++ b/xen/include/asm-i386/current.h
@@ -0,0 +1,15 @@
+#ifndef _I386_CURRENT_H
+#define _I386_CURRENT_H
+
+struct task_struct;
+
+static inline struct task_struct * get_current(void)
+{
+	struct task_struct *current;
+	__asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
+	return current;
+ }
+ 
+#define current get_current()
+
+#endif /* !(_I386_CURRENT_H) */
diff --git a/xen/include/asm-i386/debugreg.h b/xen/include/asm-i386/debugreg.h
new file mode 100644
index 0000000000..f0b2b06ae0
--- /dev/null
+++ b/xen/include/asm-i386/debugreg.h
@@ -0,0 +1,64 @@
+#ifndef _I386_DEBUGREG_H
+#define _I386_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+   debug registers.  Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0        /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3         /* u_debugreg[DR_LASTADDR]  */
+
+#define DR_STATUS 6           /* u_debugreg[DR_STATUS]     */
+#define DR_CONTROL 7          /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register.  We can use this to determine
+   which debugging register was responsible for the trap.  The other bits
+   are either reserved or not of interest to us. */
+
+#define DR_TRAP0	(0x1)		/* db0 */
+#define DR_TRAP1	(0x2)		/* db1 */
+#define DR_TRAP2	(0x4)		/* db2 */
+#define DR_TRAP3	(0x8)		/* db3 */
+
+#define DR_STEP		(0x4000)	/* single-step */
+#define DR_SWITCH	(0x8000)	/* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+   The top two bytes of the control register consist of 4 fields of 4
+   bits - each field corresponds to one of the four debug registers,
+   and indicates what types of access we trap on, and how large the data
+   field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4   /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+
+/* The low byte to the control register determine which registers are
+   enabled.  There are 4 fields of two bits.  One bit is "local", meaning
+   that the processor will reset the bit after a task switch and the other
+   is global meaning that we have to explicitly reset the bit.  With linux,
+   you can use either one, since we explicitly zero the register when we enter
+   kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+   We can slow the instruction pipeline for instructions coming via the
+   gdt or the ldt if we want to.  I am not sure why this is an advantage */
+
+#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
+
+#endif
diff --git a/xen/include/asm-i386/delay.h b/xen/include/asm-i386/delay.h
new file mode 100644
index 0000000000..9e0adb4a27
--- /dev/null
+++ b/xen/include/asm-i386/delay.h
@@ -0,0 +1,14 @@
+#ifndef _I386_DELAY_H
+#define _I386_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/i386/lib/delay.c
+ */
+
+extern unsigned long ticks_per_usec; 
+extern void __udelay(unsigned long usecs);
+#define udelay(n) __udelay(n)
+
+#endif /* defined(_I386_DELAY_H) */
diff --git a/xen/include/asm-i386/desc.h b/xen/include/asm-i386/desc.h
new file mode 100644
index 0000000000..2cb90769b5
--- /dev/null
+++ b/xen/include/asm-i386/desc.h
@@ -0,0 +1,32 @@
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#define __FIRST_TSS_ENTRY 8
+#define __TSS(n) ((n) + __FIRST_TSS_ENTRY)
+
+#ifndef __ASSEMBLY__
+struct desc_struct {
+	unsigned long a,b;
+};
+
+extern struct desc_struct gdt_table[];
+extern struct desc_struct *idt, *gdt;
+
+struct Xgt_desc_struct {
+	unsigned short size;
+	unsigned long address __attribute__((packed));
+};
+
+#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
+#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
+
+#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3))
+
+#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" ((n)<<3))
+
+extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_tss_desc(unsigned int n, void *addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/xen/include/asm-i386/dma.h b/xen/include/asm-i386/dma.h
new file mode 100644
index 0000000000..f24c90a7bd
--- /dev/null
+++ b/xen/include/asm-i386/dma.h
@@ -0,0 +1,301 @@
+/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h>	/* And spinlocks */
+#include <asm/io.h>		/* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb	outb_p
+#else
+#define dma_outb	outb
+#endif
+
+#define dma_inb		inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.  
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0   
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
+ *
+ */
+
+#define MAX_DMA_CHANNELS	8
+
+#if 0
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+#endif
+
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG            0x09    /* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG            0xD2    /* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+
+#define DMA_ADDR_0              0x00    /* DMA address registers */
+#define DMA_ADDR_1              0x02
+#define DMA_ADDR_2              0x04
+#define DMA_ADDR_3              0x06
+#define DMA_ADDR_4              0xC0
+#define DMA_ADDR_5              0xC4
+#define DMA_ADDR_6              0xC8
+#define DMA_ADDR_7              0xCC
+
+#define DMA_CNT_0               0x01    /* DMA count registers */
+#define DMA_CNT_1               0x03
+#define DMA_CNT_2               0x05
+#define DMA_CNT_3               0x07
+#define DMA_CNT_4               0xC2
+#define DMA_CNT_5               0xC6
+#define DMA_CNT_6               0xCA
+#define DMA_CNT_7               0xCE
+
+#define DMA_PAGE_0              0x87    /* DMA page registers */
+#define DMA_PAGE_1              0x83
+#define DMA_PAGE_2              0x81
+#define DMA_PAGE_3              0x82
+#define DMA_PAGE_5              0x8B
+#define DMA_PAGE_6              0x89
+#define DMA_PAGE_7              0x8A
+
+#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT	0x10
+
+
+extern spinlock_t  dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+	if (dmanr<=3)
+		dma_outb(dmanr,  DMA1_MASK_REG);
+	else
+		dma_outb(dmanr & 3,  DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+	if (dmanr<=3)
+		dma_outb(dmanr | 4,  DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+	if (dmanr<=3)
+		dma_outb(0,  DMA1_CLEAR_FF_REG);
+	else
+		dma_outb(0,  DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+	if (dmanr<=3)
+		dma_outb(mode | dmanr,  DMA1_MODE_REG);
+	else
+		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+	switch(dmanr) {
+		case 0:
+			dma_outb(pagenr, DMA_PAGE_0);
+			break;
+		case 1:
+			dma_outb(pagenr, DMA_PAGE_1);
+			break;
+		case 2:
+			dma_outb(pagenr, DMA_PAGE_2);
+			break;
+		case 3:
+			dma_outb(pagenr, DMA_PAGE_3);
+			break;
+		case 5:
+			dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+			break;
+		case 6:
+			dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+			break;
+		case 7:
+			dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+			break;
+	}
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+	set_dma_page(dmanr, a>>16);
+	if (dmanr <= 3)  {
+	    dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+	}  else  {
+	    dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+	    dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+	}
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+        count--;
+	if (dmanr <= 3)  {
+	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+        } else {
+	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+        }
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
+					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
+
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+	
+	return (dmanr<=3)? count : (count<<1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
+extern void free_dma(unsigned int dmanr);	/* release it again */
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy 	(0)
+#endif
+
+#endif /* _ASM_DMA_H */
diff --git a/xen/include/asm-i386/domain_page.h b/xen/include/asm-i386/domain_page.h
new file mode 100644
index 0000000000..92fb261147
--- /dev/null
+++ b/xen/include/asm-i386/domain_page.h
@@ -0,0 +1,43 @@
+/******************************************************************************
+ * domain_page.h
+ * 
+ * Allow temporary mapping of domain page frames into Xen space.
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+
+extern unsigned long *mapcache[NR_CPUS];
+#define MAPCACHE_ENTRIES        1024
+
+/*
+ * Maps a given physical address, returning corresponding virtual address.
+ * The entire page containing that VA is now accessible until a 
+ * corresponding call to unmap_domain_mem().
+ */
+extern void *map_domain_mem(unsigned long pa);
+
+/*
+ * Pass a VA within a page previously mapped with map_domain_mem().
+ * That page will then be removed from the mapping lists.
+ */
+extern void unmap_domain_mem(void *va);
+
+#if 0
+#define MAPCACHE_HASH(_pfn)     ((_pfn) & (MAPCACHE_ENTRIES-1))
+static inline void *map_domain_mem(unsigned long pa)
+{
+    unsigned long pfn = pa >> PAGE_SHIFT;
+    unsigned long hash = MAPCACHE_HASH(pfn);
+    unsigned long *pent = mapcache[smp_processor_id()] + hash;
+    void *va = (void *)(MAPCACHE_VIRT_START + 
+                        (hash << PAGE_SHIFT) + 
+                        (pa & ~PAGE_MASK));
+    if ( (*pent & PAGE_MASK) != (pfn << PAGE_SHIFT) )
+    {
+        *pent = (pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
+        __flush_tlb_one(va);
+    }
+    return va;
+}
+#endif
diff --git a/xen/include/asm-i386/elf.h b/xen/include/asm-i386/elf.h
new file mode 100644
index 0000000000..ded22856d0
--- /dev/null
+++ b/xen/include/asm-i386/elf.h
@@ -0,0 +1,233 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (C) 1996  Erich Boleyn  <erich@uruk.org>
+ *  Copyright (C) 2001  Free Software Foundation, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* 32-bit data types */
+
+typedef unsigned long Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned long Elf32_Off;
+typedef signed long Elf32_Sword;
+typedef unsigned long Elf32_Word;
+/* "unsigned char" already exists */
+
+/* ELF header */
+typedef struct
+{
+  
+#define EI_NIDENT 16
+  
+  /* first four characters are defined below */
+#define EI_MAG0		0
+#define ELFMAG0		0x7f
+#define EI_MAG1		1
+#define ELFMAG1		'E'
+#define EI_MAG2		2
+#define ELFMAG2		'L'
+#define EI_MAG3		3
+#define ELFMAG3		'F'
+  
+#define EI_CLASS	4	/* data sizes */
+#define ELFCLASS32	1	/* i386 -- up to 32-bit data sizes present */
+  
+#define EI_DATA		5	/* data type and ordering */
+#define ELFDATA2LSB	1	/* i386 -- LSB 2's complement */
+  
+#define EI_VERSION	6	/* version number.  "e_version" must be the same */
+#define EV_CURRENT      1	/* current version number */
+  
+#define EI_PAD		7	/* from here in is just padding */
+  
+#define EI_BRAND	8	/* start of OS branding (This is
+				   obviously illegal against the ELF
+				   standard.) */
+  
+  unsigned char e_ident[EI_NIDENT];	/* basic identification block */
+  
+#define ET_EXEC		2	/* we only care about executable types */
+  Elf32_Half e_type;		/* file types */
+  
+#define EM_386		3	/* i386 -- obviously use this one */
+  Elf32_Half e_machine;	/* machine types */
+  Elf32_Word e_version;	/* use same as "EI_VERSION" above */
+  Elf32_Addr e_entry;		/* entry point of the program */
+  Elf32_Off e_phoff;		/* program header table file offset */
+  Elf32_Off e_shoff;		/* section header table file offset */
+  Elf32_Word e_flags;		/* flags */
+  Elf32_Half e_ehsize;		/* elf header size in bytes */
+  Elf32_Half e_phentsize;	/* program header entry size */
+  Elf32_Half e_phnum;		/* number of entries in program header */
+  Elf32_Half e_shentsize;	/* section header entry size */
+  Elf32_Half e_shnum;		/* number of entries in section header */
+  
+#define SHN_UNDEF       0
+#define SHN_LORESERVE   0xff00
+#define SHN_LOPROC      0xff00
+#define SHN_HIPROC      0xff1f
+#define SHN_ABS         0xfff1
+#define SHN_COMMON      0xfff2
+#define SHN_HIRESERVE   0xffff
+  Elf32_Half e_shstrndx;	/* section header table index */
+}
+Elf32_Ehdr;
+
+
+#define BOOTABLE_I386_ELF(h) \
+ ((h.e_ident[EI_MAG0] == ELFMAG0) & (h.e_ident[EI_MAG1] == ELFMAG1) \
+  & (h.e_ident[EI_MAG2] == ELFMAG2) & (h.e_ident[EI_MAG3] == ELFMAG3) \
+  & (h.e_ident[EI_CLASS] == ELFCLASS32) & (h.e_ident[EI_DATA] == ELFDATA2LSB) \
+  & (h.e_ident[EI_VERSION] == EV_CURRENT) & (h.e_type == ET_EXEC) \
+  & (h.e_machine == EM_386) & (h.e_version == EV_CURRENT))
+
+/* section table - ? */
+typedef struct
+{
+  Elf32_Word	sh_name;		/* Section name (string tbl index) */
+  Elf32_Word	sh_type;		/* Section type */
+  Elf32_Word	sh_flags;		/* Section flags */
+  Elf32_Addr	sh_addr;		/* Section virtual addr at execution */
+  Elf32_Off	sh_offset;		/* Section file offset */
+  Elf32_Word	sh_size;		/* Section size in bytes */
+  Elf32_Word	sh_link;		/* Link to another section */
+  Elf32_Word	sh_info;		/* Additional section information */
+  Elf32_Word	sh_addralign;		/* Section alignment */
+  Elf32_Word	sh_entsize;		/* Entry size if section holds table */
+}
+Elf32_Shdr;
+
+/* symbol table - page 4-25, figure 4-15 */
+typedef struct
+{
+  Elf32_Word st_name;
+  Elf32_Addr st_value;
+  Elf32_Word st_size;
+  unsigned char st_info;
+  unsigned char st_other;
+  Elf32_Half st_shndx;
+}
+Elf32_Sym;
+
+/* symbol type and binding attributes - page 4-26 */
+
+#define ELF32_ST_BIND(i)    ((i) >> 4)
+#define ELF32_ST_TYPE(i)    ((i) & 0xf)
+#define ELF32_ST_INFO(b,t)  (((b)<<4)+((t)&0xf))
+
+/* symbol binding - page 4-26, figure 4-16 */
+
+#define STB_LOCAL    0
+#define STB_GLOBAL   1
+#define STB_WEAK     2
+#define STB_LOPROC  13
+#define STB_HIPROC  15
+
+/* symbol types - page 4-28, figure 4-17 */
+
+#define STT_NOTYPE   0
+#define STT_OBJECT   1
+#define STT_FUNC     2
+#define STT_SECTION  3
+#define STT_FILE     4
+#define STT_LOPROC  13
+#define STT_HIPROC  15
+
+
+/* Macros to split/combine relocation type and symbol page 4-32 */
+
+#define ELF32_R_SYM(__i)	((__i)>>8)
+#define ELF32_R_TYPE(__i)	((unsigned char) (__i))
+#define ELF32_R_INFO(__s, __t)	(((__s)<<8) + (unsigned char) (__t))
+
+
+/* program header - page 5-2, figure 5-1 */
+
+typedef struct
+{
+  Elf32_Word p_type;
+  Elf32_Off p_offset;
+  Elf32_Addr p_vaddr;
+  Elf32_Addr p_paddr;
+  Elf32_Word p_filesz;
+  Elf32_Word p_memsz;
+  Elf32_Word p_flags;
+  Elf32_Word p_align;
+}
+Elf32_Phdr;
+
+/* segment types - page 5-3, figure 5-2 */
+
+#define PT_NULL		0
+#define PT_LOAD		1
+#define PT_DYNAMIC	2
+#define PT_INTERP	3
+#define PT_NOTE		4
+#define PT_SHLIB	5
+#define PT_PHDR		6
+
+#define PT_LOPROC	0x70000000
+#define PT_HIPROC	0x7fffffff
+
+/* segment permissions - page 5-6 */
+
+#define PF_X		0x1
+#define PF_W		0x2
+#define PF_R		0x4
+#define PF_MASKPROC	0xf0000000
+
+
+/* dynamic structure - page 5-15, figure 5-9 */
+
+typedef struct
+{
+  Elf32_Sword d_tag;
+  union
+  {
+    Elf32_Word d_val;
+    Elf32_Addr d_ptr;
+  }
+  d_un;
+}
+Elf32_Dyn;
+
+/* Dynamic array tags - page 5-16, figure 5-10.  */
+
+#define DT_NULL		0
+#define DT_NEEDED	1
+#define DT_PLTRELSZ	2
+#define DT_PLTGOT	3
+#define DT_HASH		4
+#define DT_STRTAB	5
+#define DT_SYMTAB	6
+#define DT_RELA		7
+#define DT_RELASZ	8
+#define DT_RELAENT      9
+#define DT_STRSZ	10
+#define DT_SYMENT	11
+#define DT_INIT		12
+#define DT_FINI		13
+#define DT_SONAME	14
+#define DT_RPATH	15
+#define DT_SYMBOLIC	16
+#define DT_REL		17
+#define DT_RELSZ	18
+#define DT_RELENT	19
+#define DT_PLTREL	20
+#define DT_DEBUG	21
+#define DT_TEXTREL	22
+#define DT_JMPREL	23
diff --git a/xen/include/asm-i386/fixmap.h b/xen/include/asm-i386/fixmap.h
new file mode 100644
index 0000000000..b0f455a5af
--- /dev/null
+++ b/xen/include/asm-i386/fixmap.h
@@ -0,0 +1,107 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <xeno/config.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+enum fixed_addresses {
+#ifdef CONFIG_X86_LOCAL_APIC
+	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+	FIX_IO_APIC_BASE_0,
+	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_HIGHMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+	__end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+                          l1_pgentry_t entry);
+
+#define set_fixmap(idx, phys) \
+		__set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+		__set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP	(0xffffe000UL)
+#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * this branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+        return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/xen/include/asm-i386/flushtlb.h b/xen/include/asm-i386/flushtlb.h
new file mode 100644
index 0000000000..306839c6a4
--- /dev/null
+++ b/xen/include/asm-i386/flushtlb.h
@@ -0,0 +1,48 @@
+/******************************************************************************
+ * flushtlb.h
+ * 
+ * TLB flush macros that count flushes.  Counting is used to enforce 
+ * zero-copy safety, particularily for the network code.
+ *
+ * akw - Jan 21, 2003
+ */
+
+#ifndef __FLUSHTLB_H
+#define __FLUSHTLB_H
+
+#include <xeno/smp.h>
+
+unsigned long tlb_flush_count[NR_CPUS];
+//#if 0 
+#define __read_cr3(__var)                                               \
+    do {                                                                \
+                __asm__ __volatile (                                    \
+                        "movl %%cr3, %0;"                               \
+                        : "=r" (__var));                                \
+    } while (0)
+//#endif
+
+#define __write_cr3_counted(__pa)                                       \
+    do {                                                                \
+                __asm__ __volatile__ (                                  \
+                        "movl %0, %%cr3;"                               \
+                        :: "r" (__pa)                                    \
+                        : "memory");                                    \
+                tlb_flush_count[smp_processor_id()]++;                  \
+    } while (0)
+
+//#endif
+#define __flush_tlb_counted()                                           \
+        do {                                                            \
+                unsigned int tmpreg;                                    \
+                                                                        \
+                __asm__ __volatile__(                                   \
+                        "movl %%cr3, %0;  # flush TLB \n"               \
+                        "movl %0, %%cr3;                "               \
+                        : "=r" (tmpreg)                                \
+                        :: "memory");                                   \
+                tlb_flush_count[smp_processor_id()]++;                  \
+        } while (0)
+
+#endif
+                           
diff --git a/xen/include/asm-i386/hardirq.h b/xen/include/asm-i386/hardirq.h
new file mode 100644
index 0000000000..bad529b882
--- /dev/null
+++ b/xen/include/asm-i386/hardirq.h
@@ -0,0 +1,88 @@
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <xeno/config.h>
+#include <xeno/irq.h>
+
+/* assembly code in softirq.h is sensitive to the offsets of these fields */
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int __local_irq_count;
+	unsigned int __local_bh_count;
+	unsigned int __syscall_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <xeno/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+/*
+ * Are we in an interrupt context? Either doing bottom half
+ * or hardware interrupt processing?
+ */
+#define in_interrupt() ({ int __cpu = smp_processor_id(); \
+	(local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+
+#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+
+#ifndef CONFIG_SMP
+
+#define hardirq_trylock(cpu)	(local_irq_count(cpu) == 0)
+#define hardirq_endlock(cpu)	do { } while (0)
+
+#define irq_enter(cpu, irq)	(local_irq_count(cpu)++)
+#define irq_exit(cpu, irq)	(local_irq_count(cpu)--)
+
+#define synchronize_irq()	barrier()
+
+#else
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+
+extern unsigned char global_irq_holder;
+extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
+
+static inline int irqs_running (void)
+{
+	int i;
+
+	for (i = 0; i < smp_num_cpus; i++)
+		if (local_irq_count(i))
+			return 1;
+	return 0;
+}
+
+static inline void release_irqlock(int cpu)
+{
+	/* if we didn't own the irq lock, just ignore.. */
+	if (global_irq_holder == (unsigned char) cpu) {
+		global_irq_holder = 0xff;
+		clear_bit(0,&global_irq_lock);
+	}
+}
+
+static inline void irq_enter(int cpu, int irq)
+{
+	++local_irq_count(cpu);
+
+	while (test_bit(0,&global_irq_lock)) {
+		cpu_relax();
+	}
+}
+
+static inline void irq_exit(int cpu, int irq)
+{
+	--local_irq_count(cpu);
+}
+
+static inline int hardirq_trylock(int cpu)
+{
+	return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
+}
+
+#define hardirq_endlock(cpu)	do { } while (0)
+
+extern void synchronize_irq(void);
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/xen/include/asm-i386/hdreg.h b/xen/include/asm-i386/hdreg.h
new file mode 100644
index 0000000000..1ad5c07394
--- /dev/null
+++ b/xen/include/asm-i386/hdreg.h
@@ -0,0 +1,12 @@
+/*
+ *  linux/include/asm-i386/hdreg.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+#ifndef __ASMi386_HDREG_H
+#define __ASMi386_HDREG_H
+
+typedef unsigned short ide_ioreg_t;
+
+#endif /* __ASMi386_HDREG_H */
diff --git a/xen/include/asm-i386/i387.h b/xen/include/asm-i386/i387.h
new file mode 100644
index 0000000000..7ec679d446
--- /dev/null
+++ b/xen/include/asm-i386/i387.h
@@ -0,0 +1,39 @@
+/*
+ * include/asm-i386/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_I386_I387_H
+#define __ASM_I386_I387_H
+
+#include <xeno/sched.h>
+#include <asm/processor.h>
+
+extern void init_fpu(void);
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+#define unlazy_fpu( tsk ) do { \
+	if ( tsk->flags & PF_USEDFPU ) \
+		save_init_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk ) do { \
+	if ( tsk->flags & PF_USEDFPU ) { \
+		asm volatile("fwait"); \
+		tsk->flags &= ~PF_USEDFPU; \
+		stts(); \
+	} \
+} while (0)
+
+#define load_mxcsr( val ) do { \
+        unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+        asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while (0)
+
+#endif /* __ASM_I386_I387_H */
diff --git a/xen/include/asm-i386/ide.h b/xen/include/asm-i386/ide.h
new file mode 100644
index 0000000000..6642abf467
--- /dev/null
+++ b/xen/include/asm-i386/ide.h
@@ -0,0 +1,128 @@
+/*
+ *  linux/include/asm-i386/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the i386 architecture specific IDE code.
+ */
+
+#ifndef __ASMi386_IDE_H
+#define __ASMi386_IDE_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifndef MAX_HWIFS
+# ifdef CONFIG_BLK_DEV_IDEPCI
+#define MAX_HWIFS	10
+# else
+#define MAX_HWIFS	6
+# endif
+#endif
+
+#define ide__sti()	__sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+	switch (base) {
+		case 0x1f0: return 14;
+		case 0x170: return 15;
+		case 0x1e8: return 11;
+		case 0x168: return 10;
+		case 0x1e0: return 8;
+		case 0x160: return 12;
+		default:
+			return 0;
+	}
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+	switch (index) {
+		case 0:	return 0x1f0;
+		case 1:	return 0x170;
+		case 2: return 0x1e8;
+		case 3: return 0x168;
+		case 4: return 0x1e0;
+		case 5: return 0x160;
+		default:
+			return 0;
+	}
+}
+
+static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
+{
+	ide_ioreg_t reg = data_port;
+	int i;
+
+	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+		hw->io_ports[i] = reg;
+		reg += 1;
+	}
+	if (ctrl_port) {
+		hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+	} else {
+		hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
+	}
+	if (irq != NULL)
+		*irq = 0;
+	hw->io_ports[IDE_IRQ_OFFSET] = 0;
+}
+
+static __inline__ void ide_init_default_hwifs(void)
+{
+#ifndef CONFIG_BLK_DEV_IDEPCI
+	hw_regs_t hw;
+	int index;
+
+	for(index = 0; index < MAX_HWIFS; index++) {
+		ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
+		hw.irq = ide_default_irq(ide_default_io_base(index));
+		ide_register_hw(&hw, NULL);
+	}
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+}
+
+typedef union {
+	unsigned all			: 8;	/* all of the bits together */
+	struct {
+		unsigned head		: 4;	/* always zeros here */
+		unsigned unit		: 1;	/* drive select number, 0 or 1 */
+		unsigned bit5		: 1;	/* always 1 */
+		unsigned lba		: 1;	/* using LBA instead of CHS */
+		unsigned bit7		: 1;	/* always 1 */
+	} b;
+} select_t;
+
+typedef union {
+	unsigned all			: 8;	/* all of the bits together */
+	struct {
+		unsigned bit0		: 1;
+		unsigned nIEN		: 1;	/* device INTRQ to host */
+		unsigned SRST		: 1;	/* host soft reset bit */
+		unsigned bit3		: 1;	/* ATA-2 thingy */
+		unsigned reserved456	: 3;
+		unsigned HOB		: 1;	/* 48-bit address ordering */
+	} b;
+} control_t;
+
+#define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
+#define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
+#define ide_check_region(from,extent)		check_region((from), (extent))
+#define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
+#define ide_release_region(from,extent)		release_region((from), (extent))
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+#define ide_ack_intr(hwif)		(1)
+#define ide_fix_driveid(id)		do {} while (0)
+#define ide_release_lock(lock)		do {} while (0)
+#define ide_get_lock(lock, hdlr, data)	do {} while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMi386_IDE_H */
diff --git a/xen/include/asm-i386/io.h b/xen/include/asm-i386/io.h
new file mode 100644
index 0000000000..9b54ae278d
--- /dev/null
+++ b/xen/include/asm-i386/io.h
@@ -0,0 +1,253 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <xeno/config.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xffff
+
+/*#include <linux/vmalloc.h>*/
+
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#if CONFIG_DEBUG_IOVIRT
+  extern void *__io_virt_debug(unsigned long x, const char *file, int line);
+  extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
+  #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
+//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
+#else
+  #define __io_virt(x) ((void *)(x))
+//#define __io_phys(x) __pa(x)
+#endif
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+	return __pa(address);
+}
+
+static inline void * phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page)	((page - frame_table) << PAGE_SHIFT)
+
+extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+static inline void * ioremap (unsigned long offset, unsigned long size)
+{
+	return __ioremap(offset, size, 0);
+}
+
+/*
+ * This one maps high address device memory and turns off caching for that area.
+ * it's useful if some control registers are in such an area and write combining
+ * or read caching is not desirable:
+ */
+static inline void * ioremap_nocache (unsigned long offset, unsigned long size)
+{
+        return __ioremap(offset, size, _PAGE_PCD);
+}
+
+extern void iounmap(void *addr);
+
+/*
+ * IO bus memory addresses are also 1:1 with the physical address
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
+#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
+#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
+#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
+#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define memset_io(a,b,c)	memset(__io_virt(a),(b),(c))
+#define memcpy_fromio(a,b,c)	memcpy((a),__io_virt(b),(c))
+#define memcpy_toio(a,b,c)	memcpy(__io_virt(a),(b),(c))
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c)		memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c)	memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c)		memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d)		eth_copy_and_sum((a),__io_virt(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d)	eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
+
+static inline int check_signature(unsigned long io_addr,
+	const unsigned char *signature, int length)
+{
+	int retval = 0;
+	do {
+		if (readb(io_addr) != *signature)
+			goto out;
+		io_addr++;
+		signature++;
+		length--;
+	} while (length);
+	retval = 1;
+out:
+	return retval;
+}
+
+static inline int isa_check_signature(unsigned long io_addr,
+	const unsigned char *signature, int length)
+{
+	int retval = 0;
+	do {
+		if (isa_readb(io_addr) != *signature)
+			goto out;
+		io_addr++;
+		signature++;
+		length--;
+	} while (length);
+	retval = 1;
+out:
+	return retval;
+}
+
+/*
+ *	Cache management
+ *
+ *	This needed for two cases
+ *	1. Out of order aware processors
+ *	2. Accidentally out of order processors (PPro errata #51)
+ */
+ 
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+static inline void flush_write_buffers(void)
+{
+	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
+}
+
+#define dma_cache_inv(_start,_size)		flush_write_buffers()
+#define dma_cache_wback(_start,_size)		flush_write_buffers()
+#define dma_cache_wback_inv(_start,_size)	flush_write_buffers()
+
+#else
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size)		do { } while (0)
+#define dma_cache_wback(_start,_size)		do { } while (0)
+#define dma_cache_wback_inv(_start,_size)	do { } while (0)
+#define flush_write_buffers()
+
+#endif
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#else
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+static inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} 
+
+#define __IN1(s) \
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } 
+
+#define __INS(s) \
+static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#endif
diff --git a/xen/include/asm-i386/io_apic.h b/xen/include/asm-i386/io_apic.h
new file mode 100644
index 0000000000..44916209a8
--- /dev/null
+++ b/xen/include/asm-i386/io_apic.h
@@ -0,0 +1,148 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define APIC_MISMATCH_DEBUG
+
+#define IO_APIC_BASE(idx) \
+		((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
+
+/*
+ * The structure of the IO-APIC:
+ */
+struct IO_APIC_reg_00 {
+	__u32	__reserved_2	: 24,
+		ID		:  4,
+		__reserved_1	:  4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_01 {
+	__u32	version		:  8,
+		__reserved_2	:  7,
+		PRQ		:  1,
+		entries		:  8,
+		__reserved_1	:  8;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_02 {
+	__u32	__reserved_2	: 24,
+		arbitration	:  4,
+		__reserved_1	:  4;
+} __attribute__ ((packed));
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+enum ioapic_irq_destination_types {
+	dest_Fixed = 0,
+	dest_LowestPrio = 1,
+	dest_SMI = 2,
+	dest__reserved_1 = 3,
+	dest_NMI = 4,
+	dest_INIT = 5,
+	dest__reserved_2 = 6,
+	dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+	__u32	vector		:  8,
+		delivery_mode	:  3,	/* 000: FIXED
+					 * 001: lowest prio
+					 * 111: ExtINT
+					 */
+		dest_mode	:  1,	/* 0: physical, 1: logical */
+		delivery_status	:  1,
+		polarity	:  1,
+		irr		:  1,
+		trigger		:  1,	/* 0: edge, 1: level */
+		mask		:  1,	/* 0: enabled, 1: disabled */
+		__reserved_2	: 15;
+
+	union {		struct { __u32
+					__reserved_1	: 24,
+					physical_dest	:  4,
+					__reserved_2	:  4;
+			} physical;
+
+			struct { __u32
+					__reserved_1	: 24,
+					logical_dest	:  8;
+			} logical;
+	} dest;
+
+} __attribute__ ((packed));
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc *mp_irqs;
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+	*IO_APIC_BASE(apic) = reg;
+	return *(IO_APIC_BASE(apic)+4);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	*IO_APIC_BASE(apic) = reg;
+	*(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+	*(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	(void) *(IO_APIC_BASE(apic)+4);
+}
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+
+#else  /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
+#endif
diff --git a/xen/include/asm-i386/ioctl.h b/xen/include/asm-i386/ioctl.h
new file mode 100644
index 0000000000..c75f20ade6
--- /dev/null
+++ b/xen/include/asm-i386/ioctl.h
@@ -0,0 +1,75 @@
+/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $
+ *
+ * linux/ioctl.h for Linux by H.H. Bergman.
+ */
+
+#ifndef _ASMI386_IOCTL_H
+#define _ASMI386_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * is useful for catching programs compiled with old versions
+ * and to avoid overwriting user space outside the user buffer area.
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms.  The i386 ioctl numbering scheme doesn't really enforce
+ * a type field.  De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here.  Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS	8
+#define _IOC_TYPEBITS	8
+#define _IOC_SIZEBITS	14
+#define _IOC_DIRBITS	2
+
+#define _IOC_NRMASK	((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK	((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK	((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK	((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT	0
+#define _IOC_TYPESHIFT	(_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT	(_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT	(_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE	0U
+#define _IOC_WRITE	1U
+#define _IOC_READ	2U
+
+#define _IOC(dir,type,nr,size) \
+	(((dir)  << _IOC_DIRSHIFT) | \
+	 ((type) << _IOC_TYPESHIFT) | \
+	 ((nr)   << _IOC_NRSHIFT) | \
+	 ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)		_IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)	_IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)	_IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)	_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)		(((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)		(((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)		(((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)		(((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN		(_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT		(_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT	((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK	(_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT	(_IOC_SIZESHIFT)
+
+#endif /* _ASMI386_IOCTL_H */
diff --git a/xen/include/asm-i386/irq.h b/xen/include/asm-i386/irq.h
new file mode 100644
index 0000000000..41049e71b2
--- /dev/null
+++ b/xen/include/asm-i386/irq.h
@@ -0,0 +1,203 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */
+
+#include <xeno/config.h>
+#include <asm/atomic.h>
+
+#define SA_INTERRUPT    0x20000000
+#define SA_SHIRQ        0x04000000
+
+#define SA_SAMPLE_RANDOM  0   /* Linux driver compatibility */
+
+#define TIMER_IRQ 0
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR	0x30
+
+#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR)
+
+#define HYPERVISOR_CALL_VECTOR	0x82
+
+/*
+ * Vectors 0x30-0x3f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ *  some of the following vectors are 'rare', they are merged
+ *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ *  TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#define SPURIOUS_APIC_VECTOR	0xff
+#define ERROR_APIC_VECTOR	0xfe
+#define INVALIDATE_TLB_VECTOR	0xfd
+#define EVENT_CHECK_VECTOR	0xfc
+#define CALL_FUNCTION_VECTOR	0xfb
+#define KDB_VECTOR		0xfa
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR	0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x40-0xee)
+ * we start at 0x41 to spread out vectors evenly between priority
+ * levels. (0x82 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR	0x41
+#define FIRST_SYSTEM_VECTOR	0xef
+
+extern int irq_vector[NR_IRQS];
+#define IO_APIC_VECTOR(irq)	irq_vector[irq]
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void mask_irq(unsigned int irq);
+extern void unmask_irq(unsigned int irq);
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#define SAVE_ALL \
+	"cld\n\t" \
+	"pushl %es\n\t" \
+	"pushl %ds\n\t" \
+	"pushl %eax\n\t" \
+	"pushl %ebp\n\t" \
+	"pushl %edi\n\t" \
+	"pushl %esi\n\t" \
+	"pushl %edx\n\t" \
+	"pushl %ecx\n\t" \
+	"pushl %ebx\n\t" \
+	"movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \
+	"movl %edx,%ds\n\t" \
+	"movl %edx,%es\n\t"
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+#define GET_CURRENT \
+	"movl %esp, %ebx\n\t" \
+	"andl $-8192, %ebx\n\t"
+
+/*
+ *	SMP has a few special interrupts for IPI messages
+ */
+
+	/* there is a second layer of macro just to get the symbolic
+	   name for the vector evaluated. This change is for RTLinux */
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v)\
+asmlinkage void x(void); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+	"pushl $"#v"-256\n\t" \
+	SAVE_ALL \
+	SYMBOL_NAME_STR(call_##x)":\n\t" \
+	"call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+	"jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
+asmlinkage void x(struct pt_regs * regs); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+	"pushl $"#v"-256\n\t" \
+	SAVE_ALL \
+	"movl %esp,%eax\n\t" \
+	"pushl %eax\n\t" \
+	SYMBOL_NAME_STR(call_##x)":\n\t" \
+	"call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+	"addl $4,%esp\n\t" \
+	"jmp ret_from_intr\n");
+
+#define BUILD_COMMON_IRQ() \
+asmlinkage void call_do_IRQ(void); \
+__asm__( \
+	"\n" __ALIGN_STR"\n" \
+	"common_interrupt:\n\t" \
+	SAVE_ALL \
+	SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \
+	"call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
+	"jmp ret_from_intr\n");
+
+/* 
+ * subtle. orig_eax is used by the signal code to distinct between
+ * system calls and interrupted 'random user-space'. Thus we have
+ * to put a negative value into orig_eax here. (the problem is that
+ * both system calls and IRQs want to have small integer numbers in
+ * orig_eax, and the syscall code has won the optimization conflict ;)
+ *
+ * Subtle as a pigs ear.  VY
+ */
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
+	"pushl $"#nr"-256\n\t" \
+	"jmp common_interrupt");
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+#include <xeno/irq.h>
+
+#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+	if (IO_APIC_IRQ(i))
+		send_IPI_self(IO_APIC_VECTOR(i));
+}
+#else
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/xen/include/asm-i386/mc146818rtc.h b/xen/include/asm-i386/mc146818rtc.h
new file mode 100644
index 0000000000..03a4efa9e8
--- /dev/null
+++ b/xen/include/asm-i386/mc146818rtc.h
@@ -0,0 +1,113 @@
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <xeno/spinlock.h>
+
+extern spinlock_t rtc_lock;             /* serialize CMOS RAM access */
+
+/**********************************************************************
+ * register summary
+ **********************************************************************/
+#define RTC_SECONDS             0
+#define RTC_SECONDS_ALARM       1
+#define RTC_MINUTES             2
+#define RTC_MINUTES_ALARM       3
+#define RTC_HOURS               4
+#define RTC_HOURS_ALARM         5
+/* RTC_*_alarm is always true if 2 MSBs are set */
+# define RTC_ALARM_DONT_CARE    0xC0
+
+#define RTC_DAY_OF_WEEK         6
+#define RTC_DAY_OF_MONTH        7
+#define RTC_MONTH               8
+#define RTC_YEAR                9
+
+/* control registers - Moto names
+ */
+#define RTC_REG_A               10
+#define RTC_REG_B               11
+#define RTC_REG_C               12
+#define RTC_REG_D               13
+
+/**********************************************************************
+ * register details
+ **********************************************************************/
+#define RTC_FREQ_SELECT RTC_REG_A
+
+/* update-in-progress  - set to "1" 244 microsecs before RTC goes off the bus,
+ * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete,
+ * totalling to a max high interval of 2.228 ms.
+ */
+# define RTC_UIP                0x80
+# define RTC_DIV_CTL            0x70
+   /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */
+#  define RTC_REF_CLCK_4MHZ     0x00
+#  define RTC_REF_CLCK_1MHZ     0x10
+#  define RTC_REF_CLCK_32KHZ    0x20
+   /* 2 values for divider stage reset, others for "testing purposes only" */
+#  define RTC_DIV_RESET1        0x60
+#  define RTC_DIV_RESET2        0x70
+  /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
+# define RTC_RATE_SELECT        0x0F
+
+/**********************************************************************/
+#define RTC_CONTROL     RTC_REG_B
+# define RTC_SET 0x80           /* disable updates for clock setting */
+# define RTC_PIE 0x40           /* periodic interrupt enable */
+# define RTC_AIE 0x20           /* alarm interrupt enable */
+# define RTC_UIE 0x10           /* update-finished interrupt enable */
+# define RTC_SQWE 0x08          /* enable square-wave output */
+# define RTC_DM_BINARY 0x04     /* all time/date values are BCD if clear */
+# define RTC_24H 0x02           /* 24 hour mode - else hours bit 7 means pm */
+# define RTC_DST_EN 0x01        /* auto switch DST - works f. USA only */
+
+/**********************************************************************/
+#define RTC_INTR_FLAGS  RTC_REG_C
+/* caution - cleared by read */
+# define RTC_IRQF 0x80          /* any of the following 3 is active */
+# define RTC_PF 0x40
+# define RTC_AF 0x20
+# define RTC_UF 0x10
+
+/**********************************************************************/
+#define RTC_VALID       RTC_REG_D
+# define RTC_VRT 0x80           /* valid RAM and time */
+/**********************************************************************/
+
+/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) 
+ * determines if the following two #defines are needed
+ */
+#ifndef BCD_TO_BIN
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#endif
+
+#ifndef BIN_TO_BCD
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+#endif
+
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/xen/include/asm-i386/mpspec.h b/xen/include/asm-i386/mpspec.h
new file mode 100644
index 0000000000..2829cb54a3
--- /dev/null
+++ b/xen/include/asm-i386/mpspec.h
@@ -0,0 +1,224 @@
+#ifndef __ASM_MPSPEC_H
+#define __ASM_MPSPEC_H
+
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is. 
+ */
+ 
+#define SMP_MAGIC_IDENT	(('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+/*
+ * a maximum of 16 APICs with the current APIC ID architecture.
+ * xAPICs can have up to 256.  SAPICs have 16 ID bits.
+ */
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_APICS 256
+#else
+#define MAX_APICS 16
+#endif
+
+#define MAX_MPC_ENTRY 1024
+
+struct intel_mp_floating
+{
+	char mpf_signature[4];		/* "_MP_" 			*/
+	unsigned long mpf_physptr;	/* Configuration table address	*/
+	unsigned char mpf_length;	/* Our length (paragraphs)	*/
+	unsigned char mpf_specification;/* Specification version	*/
+	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
+	unsigned char mpf_feature1;	/* Standard or configuration ? 	*/
+	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
+	unsigned char mpf_feature3;	/* Unused (0)			*/
+	unsigned char mpf_feature4;	/* Unused (0)			*/
+	unsigned char mpf_feature5;	/* Unused (0)			*/
+};
+
+struct mp_config_table
+{
+	char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+	unsigned short mpc_length;	/* Size of table */
+	char  mpc_spec;			/* 0x01 */
+	char  mpc_checksum;
+	char  mpc_oem[8];
+	char  mpc_productid[12];
+	unsigned long mpc_oemptr;	/* 0 if not present */
+	unsigned short mpc_oemsize;	/* 0 if not present */
+	unsigned short mpc_oemcount;
+	unsigned long mpc_lapic;	/* APIC address */
+	unsigned long reserved;
+};
+
+/* Followed by entries */
+
+#define	MP_PROCESSOR	0
+#define	MP_BUS		1
+#define	MP_IOAPIC	2
+#define	MP_INTSRC	3
+#define	MP_LINTSRC	4
+#define	MP_TRANSLATION  192  /* Used by IBM NUMA-Q to describe node locality */
+
+struct mpc_config_processor
+{
+	unsigned char mpc_type;
+	unsigned char mpc_apicid;	/* Local APIC number */
+	unsigned char mpc_apicver;	/* Its versions */
+	unsigned char mpc_cpuflag;
+#define CPU_ENABLED		1	/* Processor is available */
+#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
+	unsigned long mpc_cpufeature;		
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK	0xF0
+#define CPU_FAMILY_MASK	0xF00
+	unsigned long mpc_featureflag;	/* CPUID feature value */
+	unsigned long mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+	unsigned char mpc_type;
+	unsigned char mpc_busid;
+	unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA	"EISA"
+#define BUSTYPE_ISA	"ISA"
+#define BUSTYPE_INTERN	"INTERN"	/* Internal BUS */
+#define BUSTYPE_MCA	"MCA"
+#define BUSTYPE_VL	"VL"		/* Local bus */
+#define BUSTYPE_PCI	"PCI"
+#define BUSTYPE_PCMCIA	"PCMCIA"
+#define BUSTYPE_CBUS	"CBUS"
+#define BUSTYPE_CBUSII	"CBUSII"
+#define BUSTYPE_FUTURE	"FUTURE"
+#define BUSTYPE_MBI	"MBI"
+#define BUSTYPE_MBII	"MBII"
+#define BUSTYPE_MPI	"MPI"
+#define BUSTYPE_MPSA	"MPSA"
+#define BUSTYPE_NUBUS	"NUBUS"
+#define BUSTYPE_TC	"TC"
+#define BUSTYPE_VME	"VME"
+#define BUSTYPE_XPRESS	"XPRESS"
+
+struct mpc_config_ioapic
+{
+	unsigned char mpc_type;
+	unsigned char mpc_apicid;
+	unsigned char mpc_apicver;
+	unsigned char mpc_flags;
+#define MPC_APIC_USABLE		0x01
+	unsigned long mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+	unsigned char mpc_type;
+	unsigned char mpc_irqtype;
+	unsigned short mpc_irqflag;
+	unsigned char mpc_srcbus;
+	unsigned char mpc_srcbusirq;
+	unsigned char mpc_dstapic;
+	unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+	mp_INT = 0,
+	mp_NMI = 1,
+	mp_SMI = 2,
+	mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT	0
+#define MP_IRQDIR_HIGH		1
+#define MP_IRQDIR_LOW		3
+
+
+struct mpc_config_lintsrc
+{
+	unsigned char mpc_type;
+	unsigned char mpc_irqtype;
+	unsigned short mpc_irqflag;
+	unsigned char mpc_srcbusid;
+	unsigned char mpc_srcbusirq;
+	unsigned char mpc_destapic;	
+#define MP_APIC_ALL	0xFF
+	unsigned char mpc_destapiclint;
+};
+
+struct mp_config_oemtable
+{
+	char oem_signature[4];
+#define MPC_OEM_SIGNATURE "_OEM"
+	unsigned short oem_length;	/* Size of table */
+	char  oem_rev;			/* 0x01 */
+	char  oem_checksum;
+	char  mpc_oem[8];
+};
+
+struct mpc_config_translation
+{
+        unsigned char mpc_type;
+        unsigned char trans_len;
+        unsigned char trans_type;
+        unsigned char trans_quad;
+        unsigned char trans_global;
+        unsigned char trans_local;
+        unsigned short trans_reserved;
+};
+
+/*
+ *	Default configurations
+ *
+ *	1	2 CPU ISA 82489DX
+ *	2	2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ *	3	2 CPU EISA 82489DX
+ *	4	2 CPU MCA 82489DX
+ *	5	2 CPU ISA+PCI
+ *	6	2 CPU EISA+PCI
+ *	7	2 CPU MCA+PCI
+ */
+
+#ifdef CONFIG_MULTIQUAD
+#define MAX_IRQ_SOURCES 512
+#else /* !CONFIG_MULTIQUAD */
+#define MAX_IRQ_SOURCES 256
+#endif /* CONFIG_MULTIQUAD */
+
+#define MAX_MP_BUSSES 32
+enum mp_bustype {
+	MP_BUS_ISA = 1,
+	MP_BUS_EISA,
+	MP_BUS_PCI,
+	MP_BUS_MCA
+};
+extern int *mp_bus_id_to_type;
+extern int *mp_bus_id_to_node;
+extern int *mp_bus_id_to_local;
+extern int *mp_bus_id_to_pci_bus;
+extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern unsigned long phys_cpu_present_map;
+extern int smp_found_config;
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+extern int nr_ioapics;
+extern int apic_version [MAX_APICS];
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc *mp_irqs;
+extern int mpc_default_type;
+extern int mp_current_pci_id;
+extern unsigned long mp_lapic_addr;
+extern int pic_mode;
+extern int using_apic_timer;
+
+#endif
+
diff --git a/xen/include/asm-i386/msr.h b/xen/include/asm-i386/msr.h
new file mode 100644
index 0000000000..11bcb7f29e
--- /dev/null
+++ b/xen/include/asm-i386/msr.h
@@ -0,0 +1,104 @@
+#ifndef __ASM_MSR_H
+#define __ASM_MSR_H
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+     __asm__ __volatile__("rdmsr" \
+			  : "=a" (val1), "=d" (val2) \
+			  : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+     __asm__ __volatile__("wrmsr" \
+			  : /* no outputs */ \
+			  : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+     __asm__ __volatile__("rdpmc" \
+			  : "=a" (low), "=d" (high) \
+			  : "c" (counter))
+
+/* symbolic names for some interesting MSRs */
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR		0
+#define MSR_IA32_P5_MC_TYPE		1
+#define MSR_IA32_PLATFORM_ID		0x17
+#define MSR_IA32_EBL_CR_POWERON		0x2a
+
+#define MSR_IA32_APICBASE		0x1b
+#define MSR_IA32_APICBASE_BSP		(1<<8)
+#define MSR_IA32_APICBASE_ENABLE	(1<<11)
+#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE		0x79
+#define MSR_IA32_UCODE_REV		0x8b
+
+#define MSR_IA32_PERFCTR0		0xc1
+#define MSR_IA32_PERFCTR1		0xc2
+
+#define MSR_IA32_BBL_CR_CTL		0x119
+
+#define MSR_IA32_MCG_CAP		0x179
+#define MSR_IA32_MCG_STATUS		0x17a
+#define MSR_IA32_MCG_CTL		0x17b
+
+#define MSR_IA32_EVNTSEL0		0x186
+#define MSR_IA32_EVNTSEL1		0x187
+
+#define MSR_IA32_DEBUGCTLMSR		0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP	0x1db
+#define MSR_IA32_LASTBRANCHTOIP		0x1dc
+#define MSR_IA32_LASTINTFROMIP		0x1dd
+#define MSR_IA32_LASTINTTOIP		0x1de
+
+#define MSR_IA32_MC0_CTL		0x400
+#define MSR_IA32_MC0_STATUS		0x401
+#define MSR_IA32_MC0_ADDR		0x402
+#define MSR_IA32_MC0_MISC		0x403
+
+/* AMD Defined MSRs */
+#define MSR_K6_EFER			0xC0000080
+#define MSR_K6_STAR			0xC0000081
+#define MSR_K6_WHCR			0xC0000082
+#define MSR_K6_UWCCR			0xC0000085
+#define MSR_K6_PSOR			0xC0000087
+#define MSR_K6_PFIR			0xC0000088
+
+#define MSR_K7_EVNTSEL0			0xC0010000
+#define MSR_K7_PERFCTR0			0xC0010004
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1			0x107
+#define MSR_IDT_FCR2			0x108
+#define MSR_IDT_FCR3			0x109
+#define MSR_IDT_FCR4			0x10a
+
+#define MSR_IDT_MCR0			0x110
+#define MSR_IDT_MCR1			0x111
+#define MSR_IDT_MCR2			0x112
+#define MSR_IDT_MCR3			0x113
+#define MSR_IDT_MCR4			0x114
+#define MSR_IDT_MCR5			0x115
+#define MSR_IDT_MCR6			0x116
+#define MSR_IDT_MCR7			0x117
+#define MSR_IDT_MCR_CTRL		0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR			0x1107
+
+#endif /* __ASM_MSR_H */
diff --git a/xen/include/asm-i386/page.h b/xen/include/asm-i386/page.h
new file mode 100644
index 0000000000..63b5c73afd
--- /dev/null
+++ b/xen/include/asm-i386/page.h
@@ -0,0 +1,175 @@
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+
+#ifndef __ASSEMBLY__
+#define BUG() do {					\
+	printk("BUG at %s:%d\n", __FILE__, __LINE__);	\
+	__asm__ __volatile__("ud2");			\
+} while (0)
+#endif /* __ASSEMBLY__ */
+
+
+#define L1_PAGETABLE_SHIFT       12
+#define L2_PAGETABLE_SHIFT       22
+
+#define ENTRIES_PER_L1_PAGETABLE 1024
+#define ENTRIES_PER_L2_PAGETABLE 1024
+
+#define PAGE_SHIFT               L1_PAGETABLE_SHIFT
+#define PAGE_SIZE	         (1UL << PAGE_SHIFT)
+#define PAGE_MASK	         (~(PAGE_SIZE-1))
+
+#define clear_page(_p)           memset((void *)(_p), 0, PAGE_SIZE)
+#define copy_page(_t,_f)         memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <xeno/config.h>
+typedef struct { unsigned long l1_lo; } l1_pgentry_t;
+typedef struct { unsigned long l2_lo; } l2_pgentry_t;
+typedef l1_pgentry_t *l1_pagetable_t;
+typedef l2_pgentry_t *l2_pagetable_t;
+typedef struct { unsigned long pt_lo; } pagetable_t;
+#endif /* !__ASSEMBLY__ */
+
+/* Strip type from a table entry. */
+#define l1_pgentry_val(_x) ((_x).l1_lo)
+#define l2_pgentry_val(_x) ((_x).l2_lo)
+#define pagetable_val(_x)  ((_x).pt_lo)
+
+#define alloc_l1_pagetable()  ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l2_pagetable()  ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
+
+/* Add type to a table entry. */
+#define mk_l1_pgentry(_x)  ( (l1_pgentry_t) { (_x) } )
+#define mk_l2_pgentry(_x)  ( (l2_pgentry_t) { (_x) } )
+#define mk_pagetable(_x)   ( (pagetable_t) { (_x) } )
+
+/* Turn a typed table entry into a page index. */
+#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) 
+#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
+
+/* Turn a typed table entry into a physical address. */
+#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
+#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
+
+/* Dereference a typed level-2 entry to yield a typed level-1 table. */
+#define l2_pgentry_to_l1(_x)     \
+  ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+  ((_a) >> L2_PAGETABLE_SHIFT)
+
+/* Hypervisor table entries use zero to sugnify 'empty'. */
+#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
+#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
+
+#define __PAGE_OFFSET		(0xFC400000)
+#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+#define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define page_address(_p)        (__va(((_p) - frame_table) << PAGE_SHIFT))
+#define virt_to_page(kaddr)	(frame_table + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page)	((page - frame_table) < max_mapnr)
+
+/* High table entries are reserved by the hypervisor. */
+#define DOMAIN_ENTRIES_PER_L2_PAGETABLE	    \
+  (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
+#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
+  (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/flushtlb.h>
+
+extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE];
+extern l2_pgentry_t *idle_pg_table[NR_CPUS];
+extern void paging_init(void);
+
+#define __flush_tlb() __flush_tlb_counted()
+
+/* Flush global pages as well. */
+
+#define __pge_off()                                                     \
+        do {                                                            \
+                __asm__ __volatile__(                                   \
+                        "movl %0, %%cr4;  # turn off PGE     "          \
+                        :: "r" (mmu_cr4_features & ~X86_CR4_PGE));      \
+        } while (0)
+
+#define __pge_on()                                                      \
+        do {                                                            \
+                __asm__ __volatile__(                                   \
+                        "movl %0, %%cr4;  # turn off PGE     "          \
+                        :: "r" (mmu_cr4_features));                     \
+        } while (0)
+
+
+#define __flush_tlb_all()						\
+	do {								\
+                __pge_off();                                            \
+		__flush_tlb_counted();					\
+                __pge_on();                                             \
+	} while (0)
+
+#define __flush_tlb_one(__addr) \
+__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
+
+#endif /* !__ASSEMBLY__ */
+
+
+#define _PAGE_PRESENT	0x001
+#define _PAGE_RW	0x002
+#define _PAGE_USER	0x004
+#define _PAGE_PWT	0x008
+#define _PAGE_PCD	0x010
+#define _PAGE_ACCESSED	0x020
+#define _PAGE_DIRTY	0x040
+#define _PAGE_PAT       0x080
+#define _PAGE_PSE	0x080
+#define _PAGE_GLOBAL	0x100
+
+#define __PAGE_HYPERVISOR \
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_NOCACHE \
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_RO \
+	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
+
+#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
+#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
+#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
+
+#define mk_l2_writeable(_p) \
+    (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) |  _PAGE_RW))
+#define mk_l2_readonly(_p) \
+    (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW))
+#define mk_l1_writeable(_p) \
+    (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) |  _PAGE_RW))
+#define mk_l1_readonly(_p) \
+    (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW))
+
+
+#ifndef __ASSEMBLY__
+static __inline__ int get_order(unsigned long size)
+{
+    int order;
+    
+    size = (size-1) >> (PAGE_SHIFT-1);
+    order = -1;
+    do {
+        size >>= 1;
+        order++;
+    } while (size);
+    return order;
+}
+#endif
+
+#endif /* _I386_PAGE_H */
diff --git a/xen/include/asm-i386/param.h b/xen/include/asm-i386/param.h
new file mode 100644
index 0000000000..1b10bf49fe
--- /dev/null
+++ b/xen/include/asm-i386/param.h
@@ -0,0 +1,24 @@
+#ifndef _ASMi386_PARAM_H
+#define _ASMi386_PARAM_H
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE	4096
+
+#ifndef NGROUPS
+#define NGROUPS		32
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#ifdef __KERNEL__
+# define CLOCKS_PER_SEC	100	/* frequency at which times() counts */
+#endif
+
+#endif
diff --git a/xen/include/asm-i386/pci.h b/xen/include/asm-i386/pci.h
new file mode 100644
index 0000000000..1ffade8914
--- /dev/null
+++ b/xen/include/asm-i386/pci.h
@@ -0,0 +1,286 @@
+#ifndef __i386_PCI_H
+#define __i386_PCI_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+   already-configured bus numbers - to be used for buggy BIOSes
+   or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses()	0
+#endif
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		(pci_mem_start)
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+/*#include <linux/string.h>*/
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS	(1)
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices,
+ * NULL for PCI-like buses (ISA, EISA).
+ * Returns non-NULL cpu-view pointer to the buffer if successful and
+ * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
+ * is undefined.
+ */
+extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+				  dma_addr_t *dma_handle);
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned from pci_alloc_consistent,
+ * size must be the same as what as passed into pci_alloc_consistent,
+ * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+				void *vaddr, dma_addr_t dma_handle);
+
+/* Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+					size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	flush_write_buffers();
+	return virt_to_bus(ptr);
+}
+
+/* Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+				    size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	/* Nothing to do */
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct pfn_info instead of a virtual address
+ */
+static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page,
+				      unsigned long offset, size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+
+	return (dma_addr_t)(page - frame_table) * PAGE_SIZE + offset;
+}
+
+static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+				  size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	/* Nothing to do */
+}
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)		(0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)	do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)		(0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+			     int nents, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+ 
+ 	/*
+ 	 * temporary 2.4 hack
+ 	 */
+ 	for (i = 0; i < nents; i++ ) {
+ 		if (sg[i].address && sg[i].page)
+ 			out_of_line_bug();
+
+		/* not worth checking since NULL is ok says SMH */
+#if 0
+ 		else if (!sg[i].address && !sg[i].page)
+ 			out_of_line_bug();
+#endif
+ 
+ 		if (sg[i].address)
+ 			sg[i].dma_address = virt_to_bus(sg[i].address);
+ 		else
+ 			sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+ 	}
+ 
+	flush_write_buffers();
+	return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+				int nents, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	/* Nothing to do */
+}
+
+/* Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+static inline void pci_dma_sync_single(struct pci_dev *hwdev,
+				       dma_addr_t dma_handle,
+				       size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	flush_write_buffers();
+}
+
+/* Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
+				   struct scatterlist *sg,
+				   int nelems, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+	flush_write_buffers();
+}
+
+/* Return whether the given PCI device DMA address mask can
+ * be supported properly.  For example, if your device can
+ * only drive the low 24-bits during PCI bus mastering, then
+ * you would pass 0x00ffffff as the mask to this function.
+ */
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+{
+        /*
+         * we fall back to GFP_DMA when the mask isn't all 1s,
+         * so we can't guarantee allocations that must be
+         * within a tighter range than GFP_DMA..
+         */
+        if(mask < 0x00ffffff)
+                return 0;
+
+	return 1;
+}
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)	(1)
+
+static __inline__ dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction)
+{
+	return ((dma64_addr_t) page_to_bus(page) +
+		(dma64_addr_t) offset);
+}
+
+static __inline__ struct pfn_info *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+	unsigned long poff = (dma_addr >> PAGE_SHIFT);
+
+	return frame_table + poff;
+}
+
+static __inline__ unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+	return (dma_addr & ~PAGE_MASK);
+}
+
+static __inline__ void
+pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+	flush_write_buffers();
+}
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#if 0 /* XXX Not in land of Xen XXX */
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state, int write_combine);
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __i386_PCI_H */
diff --git a/xen/include/asm-i386/pgalloc.h b/xen/include/asm-i386/pgalloc.h
new file mode 100644
index 0000000000..fcba5e1585
--- /dev/null
+++ b/xen/include/asm-i386/pgalloc.h
@@ -0,0 +1,117 @@
+#ifndef _I386_PGALLOC_H
+#define _I386_PGALLOC_H
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+
+/*
+ * Allocate and free page tables.
+ */
+
+
+#define pte_free(pte)		pte_free_fast(pte)
+#define pgd_alloc(mm)		get_pgd_fast()
+#define pgd_free(pgd)		free_pgd_fast(pgd)
+
+/*
+ * allocating and freeing a pmd is trivial: the 1-entry pmd is
+ * inside the pgd, so has no extra memory associated with it.
+ * (In the PAE case we free the pmds as part of the pgd.)
+ */
+
+#define pmd_alloc_one_fast(mm, addr)	({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
+#define pmd_free_slow(x)		do { } while (0)
+#define pmd_free_fast(x)		do { } while (0)
+#define pmd_free(x)			do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+#if 0
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+	unsigned long addr)
+{
+	if (vma->vm_mm == current->active_mm)
+		__flush_tlb_one(addr);
+}
+#endif
+
+static inline void flush_tlb_range(struct mm_struct *mm,
+	unsigned long start, unsigned long end)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+#else
+
+#include <xeno/smp.h>
+
+#define local_flush_tlb() \
+	__flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+/*extern void flush_tlb_page(struct vm_area_struct *, unsigned long);*/
+
+#define flush_tlb()	flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(mm);
+}
+
+#define TLBSTATE_OK	1
+#define TLBSTATE_LAZY	2
+
+struct tlb_state
+{
+	struct mm_struct *active_mm;
+	int state;
+};
+extern struct tlb_state cpu_tlbstate[NR_CPUS];
+
+
+#endif
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+	/* i386 does not keep any page table caches in TLB */
+}
+
+#endif /* _I386_PGALLOC_H */
diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h
new file mode 100644
index 0000000000..36a50b2976
--- /dev/null
+++ b/xen/include/asm-i386/processor.h
@@ -0,0 +1,501 @@
+/*
+ * include/asm-i386/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_I386_PROCESSOR_H
+#define __ASM_I386_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/cpufeature.h>
+#include <asm/desc.h>
+#include <xeno/config.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+    __u8	x86;		/* CPU family */
+    __u8	x86_vendor;	/* CPU vendor */
+    __u8	x86_model;
+    __u8	x86_mask;
+    int	cpuid_level;	/* Maximum supported CPUID level, -1=no CPUID */
+    __u32	x86_capability[NCAPINTS];
+    char    x86_vendor_id[16];
+    unsigned long *pgd_quick;
+    unsigned long *pmd_quick;
+    unsigned long *pte_quick;
+    unsigned long pgtable_cache_sz;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+#define cpu_has_pge	(test_bit(X86_FEATURE_PGE,  boot_cpu_data.x86_capability))
+#define cpu_has_pse	(test_bit(X86_FEATURE_PSE,  boot_cpu_data.x86_capability))
+#define cpu_has_pae	(test_bit(X86_FEATURE_PAE,  boot_cpu_data.x86_capability))
+#define cpu_has_tsc	(test_bit(X86_FEATURE_TSC,  boot_cpu_data.x86_capability))
+#define cpu_has_de	(test_bit(X86_FEATURE_DE,   boot_cpu_data.x86_capability))
+#define cpu_has_vme	(test_bit(X86_FEATURE_VME,  boot_cpu_data.x86_capability))
+#define cpu_has_fxsr	(test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability))
+#define cpu_has_xmm	(test_bit(X86_FEATURE_XMM,  boot_cpu_data.x86_capability))
+#define cpu_has_fpu	(test_bit(X86_FEATURE_FPU,  boot_cpu_data.x86_capability))
+#define cpu_has_apic	(test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF	0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF	0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF	0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF	0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF	0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF	0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL	0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT	0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF	0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM	0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC	0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF	0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ */
+static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+    __asm__("cpuid"
+            : "=a" (*eax),
+            "=b" (*ebx),
+            "=c" (*ecx),
+            "=d" (*edx)
+            : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+    unsigned int eax;
+
+    __asm__("cpuid"
+            : "=a" (eax)
+            : "0" (op)
+            : "bx", "cx", "dx");
+    return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+    unsigned int eax, ebx;
+
+    __asm__("cpuid"
+            : "=a" (eax), "=b" (ebx)
+            : "0" (op)
+            : "cx", "dx" );
+    return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+    unsigned int eax, ecx;
+
+    __asm__("cpuid"
+            : "=a" (eax), "=c" (ecx)
+            : "0" (op)
+            : "bx", "dx" );
+    return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+    unsigned int eax, edx;
+
+    __asm__("cpuid"
+            : "=a" (eax), "=d" (edx)
+            : "0" (op)
+            : "bx", "cx");
+    return edx;
+}
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME		0x0001	/* enable vm86 extensions */
+#define X86_CR4_PVI		0x0002	/* virtual interrupts flag enable */
+#define X86_CR4_TSD		0x0004	/* disable time stamp at ipl 3 */
+#define X86_CR4_DE		0x0008	/* enable debugging extensions */
+#define X86_CR4_PSE		0x0010	/* enable page size extensions */
+#define X86_CR4_PAE		0x0020	/* enable physical address extensions */
+#define X86_CR4_MCE		0x0040	/* Machine check enable */
+#define X86_CR4_PGE		0x0080	/* enable global pages */
+#define X86_CR4_PCE		0x0100	/* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR		0x0200	/* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT	0x0400	/* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+    mmu_cr4_features |= mask;
+    __asm__("movl %%cr4,%%eax\n\t"
+            "orl %0,%%eax\n\t"
+            "movl %%eax,%%cr4\n"
+            : : "irg" (mask)
+            :"ax");
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+    mmu_cr4_features &= ~mask;
+    __asm__("movl %%cr4,%%eax\n\t"
+            "andl %0,%%eax\n\t"
+            "movl %%eax,%%cr4\n"
+            : : "irg" (~mask)
+            :"ax");
+}
+
+/*
+ *      Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ *      Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+	outb((reg), 0x22); \
+	outb((data), 0x23); \
+} while (0)
+
+#define EISA_bus (0)
+#define MCA_bus  (0)
+
+/* from system description table in BIOS.  Mostly for MCA use, but
+others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE	(PAGE_OFFSET)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	(TASK_SIZE / 3)
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE	32
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fsave_struct {
+    long	cwd;
+    long	swd;
+    long	twd;
+    long	fip;
+    long	fcs;
+    long	foo;
+    long	fos;
+    long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+    long	status;		/* software status information */
+};
+
+struct i387_fxsave_struct {
+    unsigned short	cwd;
+    unsigned short	swd;
+    unsigned short	twd;
+    unsigned short	fop;
+    long	fip;
+    long	fcs;
+    long	foo;
+    long	fos;
+    long	mxcsr;
+    long	reserved;
+    long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
+    long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
+    long	padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+    long	cwd;
+    long	swd;
+    long	twd;
+    long	fip;
+    long	fcs;
+    long	foo;
+    long	fos;
+    long	st_space[20];	/* 8*10 bytes for each FP-reg = 80 bytes */
+    unsigned char	ftop, changed, lookahead, no_update, rm, alimit;
+    struct info	*info;
+    unsigned long	entry_eip;
+};
+
+union i387_union {
+    struct i387_fsave_struct	fsave;
+    struct i387_fxsave_struct	fxsave;
+    struct i387_soft_struct soft;
+};
+
+typedef struct {
+    unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+    unsigned short	back_link,__blh;
+    unsigned long	esp0;
+    unsigned short	ss0,__ss0h;
+    unsigned long	esp1;
+    unsigned short	ss1,__ss1h;
+    unsigned long	esp2;
+    unsigned short	ss2,__ss2h;
+    unsigned long	__cr3;
+    unsigned long	eip;
+    unsigned long	eflags;
+    unsigned long	eax,ecx,edx,ebx;
+    unsigned long	esp;
+    unsigned long	ebp;
+    unsigned long	esi;
+    unsigned long	edi;
+    unsigned short	es, __esh;
+    unsigned short	cs, __csh;
+    unsigned short	ss, __ssh;
+    unsigned short	ds, __dsh;
+    unsigned short	fs, __fsh;
+    unsigned short	gs, __gsh;
+    unsigned short	ldt, __ldth;
+    unsigned short	trace, bitmap;
+    unsigned long	io_bitmap[IO_BITMAP_SIZE+1];
+    /*
+     * pads the TSS to be cacheline-aligned (size is 0x100)
+     */
+    unsigned long __cacheline_filler[5];
+};
+
+struct thread_struct {
+    unsigned long	esp0; /* top of the stack */
+    unsigned long	eip;  /* in kernel space, saved on task switch */
+    unsigned long	esp;  /* "" */
+    unsigned long	fs;   /* "" (NB. DS/ES constant in mon, so no save) */
+    unsigned long	gs;   /* "" ("") */
+    unsigned long esp1, ss1;
+/* Hardware debugging registers */
+    unsigned long	debugreg[8];  /* %%db0-7 debug registers */
+/* fault info */
+    unsigned long	cr2, trap_no, error_code;
+/* floating point info */
+    union i387_union	i387;
+/* Trap info. */
+    int                 fast_trap_idx;
+    struct desc_struct  fast_trap_desc;
+    trap_info_t         traps[256];
+};
+
+#define IDT_ENTRIES 256
+extern struct desc_struct idt_table[];
+extern struct desc_struct *idt_tables[];
+
+#define SET_DEFAULT_FAST_TRAP(_p) \
+    (_p)->fast_trap_idx = 0x20;   \
+    (_p)->fast_trap_desc.a = 0;   \
+    (_p)->fast_trap_desc.b = 0;
+
+#define CLEAR_FAST_TRAP(_p) \
+    (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+     0, 8))
+
+#define SET_FAST_TRAP(_p)   \
+    (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+     &((_p)->fast_trap_desc), 8))
+
+#define INIT_THREAD  {						\
+	sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */   \
+	0, 0, 0, 0, 0, 0,		      			\
+	{ [0 ... 7] = 0 },	/* debugging registers */	\
+	0, 0, 0,						\
+	{ { 0, }, },		/* 387 state */			\
+	0x20, { 0, 0 },		/* DEFAULT_FAST_TRAP */		\
+	{ {0} }			/* io permissions */		\
+}
+
+#define INIT_TSS  {						\
+	0,0, /* back_link, __blh */				\
+	sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */	\
+	__HYPERVISOR_DS, 0, /* ss0 */				\
+	0,0,0,0,0,0, /* stack1, stack2 */			\
+	0, /* cr3 */						\
+	0,0, /* eip,eflags */					\
+	0,0,0,0, /* eax,ecx,edx,ebx */				\
+	0,0,0,0, /* esp,ebp,esi,edi */				\
+	0,0,0,0,0,0, /* es,cs,ss */				\
+	0,0,0,0,0,0, /* ds,fs,gs */				\
+	0,0, /* ldt */						\
+	0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */		\
+	{~0, } /* ioperm */					\
+}
+
+#define start_thread(regs, new_eip, new_esp) do {		\
+	__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));	\
+	set_fs(USER_DS);					\
+	regs->xds = __USER_DS;					\
+	regs->xes = __USER_DS;					\
+	regs->xss = __USER_DS;					\
+	regs->xcs = __USER_CS;					\
+	regs->eip = new_eip;					\
+	regs->esp = new_esp;					\
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Copy and release all segment info associated with a VM */
+extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
+extern void release_segments(struct mm_struct * mm);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+static inline unsigned long thread_saved_pc(struct thread_struct *t)
+{
+    return ((unsigned long *)t->esp)[3];
+}
+
+unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+#define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+#define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+
+#define idle0_task	(idle0_task_union.task)
+#define idle0_stack	(idle0_task_union.stack)
+
+struct microcode {
+    unsigned int hdrver;
+    unsigned int rev;
+    unsigned int date;
+    unsigned int sig;
+    unsigned int cksum;
+    unsigned int ldrver;
+    unsigned int pf;
+    unsigned int reserved[5];
+    unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE	_IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+    __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_relax()	rep_nop()
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#ifdef 	CONFIG_MPENTIUMIII
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+    __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+    __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+    __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+#endif
+
+#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h
new file mode 100644
index 0000000000..509001cf57
--- /dev/null
+++ b/xen/include/asm-i386/ptrace.h
@@ -0,0 +1,86 @@
+#ifndef _I386_PTRACE_H
+#define _I386_PTRACE_H
+
+#define EBX 0
+#define ECX 1
+#define EDX 2
+#define ESI 3
+#define EDI 4
+#define EBP 5
+#define EAX 6
+#define DS 7
+#define ES 8
+#define FS 9
+#define GS 10
+#define ORIG_EAX 11
+#define EIP 12
+#define CS  13
+#define EFL 14
+#define UESP 15
+#define SS   16
+#define FRAME_SIZE 17
+
+/* this struct defines the way the registers are stored on the 
+   stack during a system call. */
+
+struct pt_regs {
+	long ebx;
+	long ecx;
+	long edx;
+	long esi;
+	long edi;
+	long ebp;
+	long eax;
+	int  xds;
+	int  xes;
+	long orig_eax;
+	long eip;
+	int  xcs;
+	long eflags;
+	long esp;
+	int  xss;
+};
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_GETFPXREGS         18
+#define PTRACE_SETFPXREGS         19
+
+#define PTRACE_SETOPTIONS         21
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD     0x00000001
+
+enum EFLAGS {
+        EF_CF   = 0x00000001,
+        EF_PF   = 0x00000004,
+        EF_AF   = 0x00000010,
+        EF_ZF   = 0x00000040,
+        EF_SF   = 0x00000080,
+        EF_TF   = 0x00000100,
+        EF_IE   = 0x00000200,
+        EF_DF   = 0x00000400,
+        EF_OF   = 0x00000800,
+        EF_IOPL = 0x00003000,
+        EF_IOPL_RING0 = 0x00000000,
+        EF_IOPL_RING1 = 0x00001000,
+        EF_IOPL_RING2 = 0x00002000,
+        EF_NT   = 0x00004000,   /* nested task */
+        EF_RF   = 0x00010000,   /* resume */
+        EF_VM   = 0x00020000,   /* virtual mode */
+        EF_AC   = 0x00040000,   /* alignment */
+        EF_VIF  = 0x00080000,   /* virtual interrupt */
+        EF_VIP  = 0x00100000,   /* virtual interrupt pending */
+        EF_ID   = 0x00200000,   /* id */
+};
+
+#ifdef __KERNEL__
+#define user_mode(regs) ((3 & (regs)->xcs))
+#define instruction_pointer(regs) ((regs)->eip)
+extern void show_regs(struct pt_regs *);
+#endif
+
+#endif
diff --git a/xen/include/asm-i386/rwlock.h b/xen/include/asm-i386/rwlock.h
new file mode 100644
index 0000000000..9475419f95
--- /dev/null
+++ b/xen/include/asm-i386/rwlock.h
@@ -0,0 +1,83 @@
+/* include/asm-i386/rwlock.h
+ *
+ *	Helpers used by both rw spinlocks and rw semaphores.
+ *
+ *	Based in part on code from semaphore.h and
+ *	spinlock.h Copyright 1996 Linus Torvalds.
+ *
+ *	Copyright 1999 Red Hat, Inc.
+ *
+ *	Written by Benjamin LaHaise.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_I386_RWLOCK_H
+#define _ASM_I386_RWLOCK_H
+
+#define RW_LOCK_BIAS		 0x01000000
+#define RW_LOCK_BIAS_STR	"0x01000000"
+
+#define __build_read_lock_ptr(rw, helper)   \
+	asm volatile(LOCK "subl $1,(%0)\n\t" \
+		     "js 2f\n" \
+		     "1:\n" \
+		     ".section .text.lock,\"ax\"\n" \
+		     "2:\tcall " helper "\n\t" \
+		     "jmp 1b\n" \
+		     ".previous" \
+		     ::"a" (rw) : "memory")
+
+#define __build_read_lock_const(rw, helper)   \
+	asm volatile(LOCK "subl $1,%0\n\t" \
+		     "js 2f\n" \
+		     "1:\n" \
+		     ".section .text.lock,\"ax\"\n" \
+		     "2:\tpushl %%eax\n\t" \
+		     "leal %0,%%eax\n\t" \
+		     "call " helper "\n\t" \
+		     "popl %%eax\n\t" \
+		     "jmp 1b\n" \
+		     ".previous" \
+		     :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_read_lock(rw, helper)	do { \
+						if (__builtin_constant_p(rw)) \
+							__build_read_lock_const(rw, helper); \
+						else \
+							__build_read_lock_ptr(rw, helper); \
+					} while (0)
+
+#define __build_write_lock_ptr(rw, helper) \
+	asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+		     "jnz 2f\n" \
+		     "1:\n" \
+		     ".section .text.lock,\"ax\"\n" \
+		     "2:\tcall " helper "\n\t" \
+		     "jmp 1b\n" \
+		     ".previous" \
+		     ::"a" (rw) : "memory")
+
+#define __build_write_lock_const(rw, helper) \
+	asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+		     "jnz 2f\n" \
+		     "1:\n" \
+		     ".section .text.lock,\"ax\"\n" \
+		     "2:\tpushl %%eax\n\t" \
+		     "leal %0,%%eax\n\t" \
+		     "call " helper "\n\t" \
+		     "popl %%eax\n\t" \
+		     "jmp 1b\n" \
+		     ".previous" \
+		     :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_write_lock(rw, helper)	do { \
+						if (__builtin_constant_p(rw)) \
+							__build_write_lock_const(rw, helper); \
+						else \
+							__build_write_lock_ptr(rw, helper); \
+					} while (0)
+
+#endif
diff --git a/xen/include/asm-i386/scatterlist.h b/xen/include/asm-i386/scatterlist.h
new file mode 100644
index 0000000000..9d858415db
--- /dev/null
+++ b/xen/include/asm-i386/scatterlist.h
@@ -0,0 +1,16 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+    char *  address;    /* Location data is to be transferred to, NULL for
+			 * highmem page */
+    struct pfn_info * page; /* Location for highmem page, if any */
+    unsigned int offset;/* for highmem, page offset */
+
+    dma_addr_t dma_address;
+    unsigned int length;
+};
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff --git a/xen/include/asm-i386/smp.h b/xen/include/asm-i386/smp.h
new file mode 100644
index 0000000000..cfec568c43
--- /dev/null
+++ b/xen/include/asm-i386/smp.h
@@ -0,0 +1,92 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#ifndef __ASSEMBLY__
+#include <xeno/config.h>
+#include <asm/ptrace.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#endif
+
+#ifdef CONFIG_SMP
+#define TARGET_CPUS cpu_online_map
+#else
+#define TARGET_CPUS 0x01
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+ 
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_invalidate_rcv(void);		/* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+static inline int cpu_logical_map(int cpu)
+{
+	return cpu;
+}
+static inline int cpu_number_map(int cpu)
+{
+	return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+#define MAX_APICID 256
+extern volatile int cpu_to_physical_apicid[NR_CPUS];
+extern volatile int physical_apicid_to_cpu[MAX_APICID];
+extern volatile int cpu_to_logical_apicid[NR_CPUS];
+extern volatile int logical_apicid_to_cpu[MAX_APICID];
+
+/*
+ * General functions that each host system must provide.
+ */
+ 
+extern void smp_boot_cpus(void);
+extern void smp_store_cpu_info(int id);		/* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#define smp_processor_id() (current->processor)
+
+static __inline int hard_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+
+static __inline int logical_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
+#endif
diff --git a/xen/include/asm-i386/smpboot.h b/xen/include/asm-i386/smpboot.h
new file mode 100644
index 0000000000..ece215fab0
--- /dev/null
+++ b/xen/include/asm-i386/smpboot.h
@@ -0,0 +1,121 @@
+#ifndef __ASM_SMPBOOT_H
+#define __ASM_SMPBOOT_H
+
+/*emum for clustered_apic_mode values*/
+enum{
+	CLUSTERED_APIC_NONE = 0,
+	CLUSTERED_APIC_XAPIC,
+	CLUSTERED_APIC_NUMAQ
+};
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+extern unsigned int apic_broadcast_id;
+extern unsigned char clustered_apic_mode;
+extern unsigned char esr_disable;
+extern unsigned char int_delivery_mode;
+extern unsigned int int_dest_addr_mode;
+extern int cyclone_setup(char*);
+
+static inline void detect_clustered_apic(char* oem, char* prod)
+{
+	/*
+	 * Can't recognize Summit xAPICs at present, so use the OEM ID.
+	 */
+	if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
+		clustered_apic_mode = CLUSTERED_APIC_XAPIC;
+		apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
+		int_dest_addr_mode = APIC_DEST_PHYSICAL;
+		int_delivery_mode = dest_Fixed;
+		esr_disable = 1;
+		/*Start cyclone clock*/
+		cyclone_setup(0);
+	}
+	else if (!strncmp(oem, "IBM NUMA", 8)){
+		clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
+		apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+		int_dest_addr_mode = APIC_DEST_LOGICAL;
+		int_delivery_mode = dest_LowestPrio;
+		esr_disable = 1;
+	}
+}
+#define	INT_DEST_ADDR_MODE (int_dest_addr_mode)
+#define	INT_DELIVERY_MODE (int_delivery_mode)
+#else /* CONFIG_X86_CLUSTERED_APIC */
+#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
+#define clustered_apic_mode (CLUSTERED_APIC_NONE)
+#define esr_disable (0)
+#define detect_clustered_apic(x,y)
+#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL)	/* logical delivery */
+#define INT_DELIVERY_MODE (dest_LowestPrio)
+#endif /* CONFIG_X86_CLUSTERED_APIC */
+#define BAD_APICID 0xFFu
+
+#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
+#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
+
+#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
+
+extern unsigned char raw_phys_apicid[NR_CPUS];
+
+/*
+ * How to map from the cpu_present_map
+ */
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+	if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+		return raw_phys_apicid[mps_cpu];
+	if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+		return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
+	return mps_cpu;
+}
+
+static inline unsigned long apicid_to_phys_cpu_present(int apicid)
+{
+	if(clustered_apic_mode)
+		return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
+	return 1UL << apicid;
+}
+
+#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
+
+/*
+ * Mappings between logical cpu number and logical / physical apicid
+ * The first four macros are trivial, but it keeps the abstraction consistent
+ */
+extern volatile int logical_apicid_2_cpu[];
+extern volatile int cpu_2_logical_apicid[];
+extern volatile int physical_apicid_2_cpu[];
+extern volatile int cpu_2_physical_apicid[];
+
+#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
+#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
+#ifdef CONFIG_MULTIQUAD			/* use logical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
+#else /* !CONFIG_MULTIQUAD */		/* use physical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
+#endif /* CONFIG_MULTIQUAD */
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+static inline int target_cpus(void)
+{
+	static int cpu;
+	switch(clustered_apic_mode){
+		case CLUSTERED_APIC_NUMAQ:
+			/* Broadcast intrs to local quad only. */
+			return APIC_BROADCAST_ID_APIC;
+		case CLUSTERED_APIC_XAPIC:
+			/*round robin the interrupts*/
+			cpu = (cpu+1)%smp_num_cpus;
+			return cpu_to_physical_apicid(cpu);
+		default:
+	}
+	return cpu_online_map;
+}
+#else
+#define target_cpus() (0x01)
+#endif
+#endif
diff --git a/xen/include/asm-i386/softirq.h b/xen/include/asm-i386/softirq.h
new file mode 100644
index 0000000000..254224411b
--- /dev/null
+++ b/xen/include/asm-i386/softirq.h
@@ -0,0 +1,48 @@
+#ifndef __ASM_SOFTIRQ_H
+#define __ASM_SOFTIRQ_H
+
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+
+#define __cpu_bh_enable(cpu) \
+		do { barrier(); local_bh_count(cpu)--; } while (0)
+#define cpu_bh_disable(cpu) \
+		do { local_bh_count(cpu)++; barrier(); } while (0)
+
+#define local_bh_disable()	cpu_bh_disable(smp_processor_id())
+#define __local_bh_enable()	__cpu_bh_enable(smp_processor_id())
+
+#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+
+/*
+ * NOTE: this assembly code assumes:
+ *
+ *    (char *)&local_bh_count - 8 == (char *)&softirq_pending
+ *
+ * If you change the offsets in irq_stat then you have to
+ * update this code as well.
+ */
+#define local_bh_enable()						\
+do {									\
+	unsigned int *ptr = &local_bh_count(smp_processor_id());	\
+									\
+	barrier();							\
+	if (!--*ptr)							\
+		__asm__ __volatile__ (					\
+			"cmpl $0, -8(%0);"				\
+			"jnz 2f;"					\
+			"1:;"						\
+									\
+			".section .text.lock,\"ax\";"			\
+			"2: pushl %%eax; pushl %%ecx; pushl %%edx;"	\
+			"call %c1;"					\
+			"popl %%edx; popl %%ecx; popl %%eax;"		\
+			"jmp 1b;"					\
+			".previous;"					\
+									\
+		: /* no output */					\
+		: "r" (ptr), "i" (do_softirq)				\
+		/* no registers clobbered */ );				\
+} while (0)
+
+#endif	/* __ASM_SOFTIRQ_H */
diff --git a/xen/include/asm-i386/spinlock.h b/xen/include/asm-i386/spinlock.h
new file mode 100644
index 0000000000..59dc7b209f
--- /dev/null
+++ b/xen/include/asm-i386/spinlock.h
@@ -0,0 +1,206 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <xeno/config.h>
+#include <xeno/lib.h>
+
+#if 0
+#define SPINLOCK_DEBUG	1
+#else
+#define SPINLOCK_DEBUG	0
+#endif
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+	volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+	unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC	0xdead4ead
+
+#if SPINLOCK_DEBUG
+#define SPINLOCK_MAGIC_INIT	, SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT	/* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)	do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)	(*(volatile char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+	"\n1:\t" \
+	"lock ; decb %0\n\t" \
+	"js 2f\n" \
+	".section .text.lock,\"ax\"\n" \
+	"2:\t" \
+	"cmpb $0,%0\n\t" \
+	"rep;nop\n\t" \
+	"jle 2b\n\t" \
+	"jmp 1b\n" \
+	".previous"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+ 
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+	"movb $1,%0" \
+		:"=m" (lock->lock) : : "memory"
+
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+	if (lock->magic != SPINLOCK_MAGIC)
+		BUG();
+	if (!spin_is_locked(lock))
+		BUG();
+#endif
+	__asm__ __volatile__(
+		spin_unlock_string
+	);
+}
+
+#else
+
+#define spin_unlock_string \
+	"xchgb %b0, %1" \
+		:"=q" (oldval), "=m" (lock->lock) \
+		:"0" (oldval) : "memory"
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+	char oldval = 1;
+#if SPINLOCK_DEBUG
+	if (lock->magic != SPINLOCK_MAGIC)
+		BUG();
+	if (!spin_is_locked(lock))
+		BUG();
+#endif
+	__asm__ __volatile__(
+		spin_unlock_string
+	);
+}
+
+#endif
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+	char oldval;
+	__asm__ __volatile__(
+		"xchgb %b0,%1"
+		:"=q" (oldval), "=m" (lock->lock)
+		:"0" (0) : "memory");
+	return oldval > 0;
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+	__label__ here;
+here:
+	if (lock->magic != SPINLOCK_MAGIC) {
+printk("eip: %p\n", &&here);
+		BUG();
+	}
+#endif
+	__asm__ __volatile__(
+		spin_lock_string
+		:"=m" (lock->lock) : : "memory");
+}
+
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+	volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+	unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC	0xdeaf1eed
+
+#if SPINLOCK_DEBUG
+#define RWLOCK_MAGIC_INIT	, RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT	/* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x)	do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void read_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+	if (rw->magic != RWLOCK_MAGIC)
+		BUG();
+#endif
+	__build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+	if (rw->magic != RWLOCK_MAGIC)
+		BUG();
+#endif
+	__build_write_lock(rw, "__write_lock_failed");
+}
+
+#define read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int write_trylock(rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+		return 1;
+	atomic_add(RW_LOCK_BIAS, count);
+	return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/xen/include/asm-i386/system.h b/xen/include/asm-i386/system.h
new file mode 100644
index 0000000000..1ccce595d8
--- /dev/null
+++ b/xen/include/asm-i386/system.h
@@ -0,0 +1,354 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <xeno/config.h>
+#include <asm/bitops.h>
+
+struct task_struct;	/* one of the stranger aspects of C forward declarations.. */
+extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
+
+#define prepare_to_switch()	do { } while(0)
+#define switch_to(prev,next) do {					\
+	asm volatile("pushl %%ebp\n\t"					\
+                     "pushl %%ebx\n\t"                                  \
+                     "pushl %%esi\n\t"                                  \
+                     "pushl %%edi\n\t"                                  \
+		     "movl %%esp,%0\n\t"	/* save ESP */		\
+                     "cli\n\t"                                          \
+		     "movl %2,%%esp\n\t"	/* restore ESP */	\
+                     "movl %6,%%cr3\n\t"        /* restore pagetables */\
+                     "sti\n\t"                                          \
+		     "movl $1f,%1\n\t"		/* save EIP */		\
+		     "pushl %3\n\t"		/* restore EIP */	\
+		     "jmp __switch_to\n"				\
+		     "1:\t"     					\
+		     "popl %%edi\n\t"					\
+		     "popl %%esi\n\t"					\
+		     "popl %%ebx\n\t"					\
+		     "popl %%ebp\n\t"					\
+		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip)	\
+		     :"m" (next->thread.esp),"m" (next->thread.eip),	\
+		      "a" (prev), "d" (next),				\
+                      "c" (pagetable_val(next->mm.pagetable))		\
+                     :"memory");                                        \
+} while (0)
+
+#define _set_base(addr,base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %%dl,%2\n\t" \
+	"movb %%dh,%3" \
+	:"=&d" (__pr) \
+	:"m" (*((addr)+2)), \
+	 "m" (*((addr)+4)), \
+	 "m" (*((addr)+7)), \
+         "0" (base) \
+        ); } while(0)
+
+#define _set_limit(addr,limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %2,%%dh\n\t" \
+	"andb $0xf0,%%dh\n\t" \
+	"orb %%dh,%%dl\n\t" \
+	"movb %%dl,%2" \
+	:"=&d" (__lr) \
+	:"m" (*(addr)), \
+	 "m" (*((addr)+6)), \
+	 "0" (limit) \
+        ); } while(0)
+
+#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
+#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
+
+static inline unsigned long _get_base(char * addr)
+{
+	unsigned long __base;
+	__asm__("movb %3,%%dh\n\t"
+		"movb %2,%%dl\n\t"
+		"shll $16,%%edx\n\t"
+		"movw %1,%%dx"
+		:"=&d" (__base)
+		:"m" (*((addr)+2)),
+		 "m" (*((addr)+4)),
+		 "m" (*((addr)+7)));
+	return __base;
+}
+
+#define get_base(ldt) _get_base( ((char *)&(ldt)) )
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value)			\
+	asm volatile("\n"			\
+		"1:\t"				\
+		"movl %0,%%" #seg "\n"		\
+		"2:\n"				\
+		".section .fixup,\"ax\"\n"	\
+		"3:\t"				\
+		"pushl $0\n\t"			\
+		"popl %%" #seg "\n\t"		\
+		"jmp 2b\n"			\
+		".previous\n"			\
+		".section __ex_table,\"a\"\n\t"	\
+		".align 4\n\t"			\
+		".long 1b,3b\n"			\
+		".previous"			\
+		: :"m" (*(unsigned int *)&(value)))
+
+/*
+ * Clear and set 'TS' bit respectively
+ */
+#define clts() __asm__ __volatile__ ("clts")
+#define read_cr0() ({ \
+	unsigned int __dummy; \
+	__asm__( \
+		"movl %%cr0,%0\n\t" \
+		:"=r" (__dummy)); \
+	__dummy; \
+})
+#define write_cr0(x) \
+	__asm__("movl %0,%%cr0": :"r" (x));
+
+#define read_cr4() ({ \
+	unsigned int __dummy; \
+	__asm__( \
+		"movl %%cr4,%0\n\t" \
+		:"=r" (__dummy)); \
+	__dummy; \
+})
+#define write_cr4(x) \
+	__asm__("movl %0,%%cr4": :"r" (x));
+#define stts() write_cr0(8 | read_cr0())
+
+#define wbinvd() \
+	__asm__ __volatile__ ("wbinvd": : :"memory");
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+	unsigned long __limit;
+	__asm__("lsll %1,%0"
+		:"=r" (__limit):"r" (segment));
+	return __limit+1;
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+
+/*
+ * The semantics of XCHGCMP8B are a bit strange, this is why
+ * there is a loop and the loading of %%eax and %%edx has to
+ * be inside. This inlines well in most cases, the cached
+ * cost is around ~38 cycles. (in the future we might want
+ * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+ * might have an implicit FPU-save as a cost, so it's not
+ * clear which path to go.)
+ */
+static inline void __set_64bit (unsigned long long * ptr,
+		unsigned int low, unsigned int high)
+{
+	__asm__ __volatile__ (
+		"\n1:\t"
+		"movl (%0), %%eax\n\t"
+		"movl 4(%0), %%edx\n\t"
+		"cmpxchg8b (%0)\n\t"
+		"jnz 1b"
+		: /* no outputs */
+		:	"D"(ptr),
+			"b"(low),
+			"c"(high)
+		:	"ax","dx","memory");
+}
+
+static inline void __set_64bit_constant (unsigned long long *ptr,
+						 unsigned long long value)
+{
+	__set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
+}
+#define ll_low(x)	*(((unsigned int*)&(x))+0)
+#define ll_high(x)	*(((unsigned int*)&(x))+1)
+
+static inline void __set_64bit_var (unsigned long long *ptr,
+			 unsigned long long value)
+{
+	__set_64bit(ptr,ll_low(value), ll_high(value));
+}
+
+#define set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit_constant(ptr, value) : \
+ __set_64bit_var(ptr, value) )
+
+#define _set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+ __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ *	  but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+	switch (size) {
+		case 1:
+			__asm__ __volatile__("xchgb %b0,%1"
+				:"=q" (x)
+				:"m" (*__xg(ptr)), "0" (x)
+				:"memory");
+			break;
+		case 2:
+			__asm__ __volatile__("xchgw %w0,%1"
+				:"=r" (x)
+				:"m" (*__xg(ptr)), "0" (x)
+				:"memory");
+			break;
+		case 4:
+			__asm__ __volatile__("xchgl %0,%1"
+				:"=r" (x)
+				:"m" (*__xg(ptr)), "0" (x)
+				:"memory");
+			break;
+	}
+	return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#ifdef CONFIG_X86_CMPXCHG
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 4:
+		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	}
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+					(unsigned long)(n),sizeof(*(ptr))))
+    
+#else
+/* Compiling for a 386 proper.	Is it worth implementing via cli/sti?  */
+#endif
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+ 
+#define mb() 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb()	mb()
+
+#ifdef CONFIG_X86_OOSTORE
+#define wmb() 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#else
+#define wmb()	__asm__ __volatile__ ("": : :"memory")
+#endif
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#endif
+
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+/* interrupt control.. */
+#define __save_flags(x)		__asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
+#define __restore_flags(x) 	__asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
+#define __cli() 		__asm__ __volatile__("cli": : :"memory")
+#define __sti()			__asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
+
+/* For spinlocks etc */
+#define local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore(x)	__restore_flags(x)
+#define local_irq_disable()	__cli()
+#define local_irq_enable()	__sti()
+
+#ifdef CONFIG_SMP
+
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+
+#endif
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+#define BROKEN_ACPI_Sx		0x0001
+#define BROKEN_INIT_AFTER_S1	0x0002
+
+#endif
diff --git a/xen/include/asm-i386/time.h b/xen/include/asm-i386/time.h
new file mode 100644
index 0000000000..2f834908a7
--- /dev/null
+++ b/xen/include/asm-i386/time.h
@@ -0,0 +1,49 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: time.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Nov 2002
+ * 
+ * Environment: Xen Hypervisor
+ * Description: Architecture dependent definition of time variables
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _ASM_TIME_H_
+#define _ASM_TIME_H_
+
+#include <asm/types.h>
+#include <asm/msr.h>
+
+/*
+ * Cycle Counter Time
+ */
+typedef u64 cc_time_t;
+static inline cc_time_t get_cc_time()
+{
+	u64 ret;
+	rdtscll(ret);
+	return ret;
+}
+
+/*
+ * System Time
+ */
+typedef s64      s_time_t;	     /* System time */
+extern  u32      stime_pcc;      /* cycle counter value at last timer irq */
+extern  s_time_t stime_now;      /* time in ns at last timer IRQ */
+
+/*
+ * Domain Virtual Time
+ */
+typedef u64 dv_time_t;
+
+#endif /* _ASM_TIME_H_ */
diff --git a/xen/include/asm-i386/timex.h b/xen/include/asm-i386/timex.h
new file mode 100644
index 0000000000..3eeb5d2b70
--- /dev/null
+++ b/xen/include/asm-i386/timex.h
@@ -0,0 +1,58 @@
+/*
+ * linux/include/asm-i386/timex.h
+ *
+ * i386 architecture timex specifications
+ */
+#ifndef _ASMi386_TIMEX_H
+#define _ASMi386_TIMEX_H
+
+#include <linux/config.h>
+#include <asm/msr.h>
+
+#ifdef CONFIG_MELAN
+#  define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
+#else
+#  define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
+#endif
+
+#define CLOCK_TICK_FACTOR	20	/* Factor of both 1000000 and CLOCK_TICK_RATE */
+#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
+	(1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
+		<< (SHIFT_SCALE-SHIFT_HZ)) / HZ)
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is. 
+ */
+typedef unsigned long long cycles_t;
+
+extern cycles_t cacheflush_time;
+
+static inline cycles_t get_cycles (void)
+{
+#ifndef CONFIG_X86_TSC
+	return 0;
+#else
+	unsigned long long ret;
+
+	rdtscll(ret);
+	return ret;
+#endif
+}
+
+extern unsigned long cpu_khz;
+
+#define vxtime_lock()		do {} while (0)
+#define vxtime_unlock()		do {} while (0)
+
+#endif
diff --git a/xen/include/asm-i386/types.h b/xen/include/asm-i386/types.h
new file mode 100644
index 0000000000..2bd0f258b9
--- /dev/null
+++ b/xen/include/asm-i386/types.h
@@ -0,0 +1,50 @@
+#ifndef _I386_TYPES_H
+#define _I386_TYPES_H
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#include <xeno/config.h>
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+#define BITS_PER_LONG 32
+
+/* DMA addresses come in generic and 64-bit flavours.  */
+
+#ifdef CONFIG_HIGHMEM
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+typedef u64 dma64_addr_t;
+
+#endif
diff --git a/xen/include/asm-i386/uaccess.h b/xen/include/asm-i386/uaccess.h
new file mode 100644
index 0000000000..ba19cfb2b3
--- /dev/null
+++ b/xen/include/asm-i386/uaccess.h
@@ -0,0 +1,600 @@
+#ifndef __i386_UACCESS_H
+#define __i386_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/prefetch.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+
+#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current->addr_limit)
+#define set_fs(x)	(current->addr_limit = (x))
+
+#define segment_eq(a,b)	((a).seg == (b).seg)
+
+extern int __verify_write(const void *, unsigned long);
+
+#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg))
+
+/*
+ * Uhhuh, this needs 33-bit arithmetic. We have a carry..
+ */
+#define __range_ok(addr,size) ({ \
+	unsigned long flag,sum; \
+	asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
+		:"=&r" (flag), "=r" (sum) \
+		:"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \
+	flag; })
+
+#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
+
+static inline int verify_area(int type, const void * addr, unsigned long size)
+{
+	return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern unsigned long search_exception_table(unsigned long);
+
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the uglyness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+	__asm__ __volatile__("call __get_user_" #size \
+		:"=a" (ret),"=d" (x) \
+		:"0" (ptr))
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr)							\
+({	int __ret_gu=1,__val_gu;						\
+	switch(sizeof (*(ptr))) {					\
+	case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break;			\
+	case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break;                 \
+	case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break;                 \
+	default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break;                 \
+	/*case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/		\
+	/*case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/		\
+	/*case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/		\
+	/*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/		\
+	}								\
+	(x) = (__typeof__(*(ptr)))__val_gu;				\
+	__ret_gu;							\
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define put_user(x,ptr)							\
+  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size)			\
+({							\
+	long __pu_err;					\
+	__put_user_size((x),(ptr),(size),__pu_err);	\
+	__pu_err;					\
+})
+
+
+#define __put_user_check(x,ptr,size)			\
+({							\
+	long __pu_err = -EFAULT;					\
+	__typeof__(*(ptr)) *__pu_addr = (ptr);		\
+	if (access_ok(VERIFY_WRITE,__pu_addr,size))	\
+		__put_user_size((x),__pu_addr,(size),__pu_err);	\
+	__pu_err;					\
+})							
+
+#define __put_user_u64(x, addr, err)				\
+	__asm__ __volatile__(					\
+		"1:	movl %%eax,0(%2)\n"			\
+		"2:	movl %%edx,4(%2)\n"			\
+		"3:\n"						\
+		".section .fixup,\"ax\"\n"			\
+		"4:	movl %3,%0\n"				\
+		"	jmp 3b\n"				\
+		".previous\n"					\
+		".section __ex_table,\"a\"\n"			\
+		"	.align 4\n"				\
+		"	.long 1b,4b\n"				\
+		"	.long 2b,4b\n"				\
+		".previous"					\
+		: "=r"(err)					\
+		: "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
+
+#define __put_user_size(x,ptr,size,retval)				\
+do {									\
+	retval = 0;							\
+	switch (size) {							\
+	  case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break;	\
+	  case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break;	\
+	  case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break;	\
+	  case 8: __put_user_u64(x,ptr,retval); break;			\
+	  default: __put_user_bad();					\
+	}								\
+} while (0)
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype)	\
+	__asm__ __volatile__(					\
+		"1:	mov"itype" %"rtype"1,%2\n"		\
+		"2:\n"						\
+		".section .fixup,\"ax\"\n"			\
+		"3:	movl %3,%0\n"				\
+		"	jmp 2b\n"				\
+		".previous\n"					\
+		".section __ex_table,\"a\"\n"			\
+		"	.align 4\n"				\
+		"	.long 1b,3b\n"				\
+		".previous"					\
+		: "=r"(err)					\
+		: ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size)				\
+({								\
+	long __gu_err, __gu_val;				\
+	__get_user_size(__gu_val,(ptr),(size),__gu_err);	\
+	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	__gu_err;						\
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval)				\
+do {									\
+	retval = 0;							\
+	switch (size) {							\
+	  case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break;	\
+	  case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break;	\
+	  case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break;	\
+	  default: (x) = __get_user_bad();				\
+	}								\
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype)	\
+	__asm__ __volatile__(					\
+		"1:	mov"itype" %2,%"rtype"1\n"		\
+		"2:\n"						\
+		".section .fixup,\"ax\"\n"			\
+		"3:	movl %3,%0\n"				\
+		"	xor"itype" %"rtype"1,%"rtype"1\n"	\
+		"	jmp 2b\n"				\
+		".previous\n"					\
+		".section __ex_table,\"a\"\n"			\
+		"	.align 4\n"				\
+		"	.long 1b,3b\n"				\
+		".previous"					\
+		: "=r"(err), ltype (x)				\
+		: "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to,from,size)					\
+do {									\
+	int __d0, __d1;							\
+	__asm__ __volatile__(						\
+		"0:	rep; movsl\n"					\
+		"	movl %3,%0\n"					\
+		"1:	rep; movsb\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"3:	lea 0(%3,%0,4),%0\n"				\
+		"	jmp 2b\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.align 4\n"					\
+		"	.long 0b,3b\n"					\
+		"	.long 1b,2b\n"					\
+		".previous"						\
+		: "=&c"(size), "=&D" (__d0), "=&S" (__d1)		\
+		: "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)	\
+		: "memory");						\
+} while (0)
+
+#define __copy_user_zeroing(to,from,size)				\
+do {									\
+	int __d0, __d1;							\
+	__asm__ __volatile__(						\
+		"0:	rep; movsl\n"					\
+		"	movl %3,%0\n"					\
+		"1:	rep; movsb\n"					\
+		"2:\n"							\
+		".section .fixup,\"ax\"\n"				\
+		"3:	lea 0(%3,%0,4),%0\n"				\
+		"4:	pushl %0\n"					\
+		"	pushl %%eax\n"					\
+		"	xorl %%eax,%%eax\n"				\
+		"	rep; stosb\n"					\
+		"	popl %%eax\n"					\
+		"	popl %0\n"					\
+		"	jmp 2b\n"					\
+		".previous\n"						\
+		".section __ex_table,\"a\"\n"				\
+		"	.align 4\n"					\
+		"	.long 0b,3b\n"					\
+		"	.long 1b,4b\n"					\
+		".previous"						\
+		: "=&c"(size), "=&D" (__d0), "=&S" (__d1)		\
+		: "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)	\
+		: "memory");						\
+} while (0)
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline unsigned long
+__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+	__copy_user_zeroing(to,from,n);
+	return n;
+}
+
+static inline unsigned long
+__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+	__copy_user(to,from,n);
+	return n;
+}
+
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user(to, from, size)			\
+do {								\
+	int __d0, __d1;						\
+	switch (size & 3) {					\
+	default:						\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"2:	shl $2,%0\n"			\
+			"	jmp 1b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,2b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	case 1:							\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:	movsb\n"			\
+			"2:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"3:	shl $2,%0\n"			\
+			"4:	incl %0\n"			\
+			"	jmp 2b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,3b\n"			\
+			"	.long 1b,4b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:	movsw\n"			\
+			"2:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"3:	shl $2,%0\n"			\
+			"4:	addl $2,%0\n"			\
+			"	jmp 2b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,3b\n"			\
+			"	.long 1b,4b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	case 3:							\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:	movsw\n"			\
+			"2:	movsb\n"			\
+			"3:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"4:	shl $2,%0\n"			\
+			"5:	addl $2,%0\n"			\
+			"6:	incl %0\n"			\
+			"	jmp 3b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,4b\n"			\
+			"	.long 1b,5b\n"			\
+			"	.long 2b,6b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	}							\
+} while (0)
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user_zeroing(to, from, size)		\
+do {								\
+	int __d0, __d1;						\
+	switch (size & 3) {					\
+	default:						\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"2:	pushl %0\n"			\
+			"	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	rep; stosl\n"			\
+			"	popl %%eax\n"			\
+			"	popl %0\n"			\
+			"	shl $2,%0\n"			\
+			"	jmp 1b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,2b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	case 1:							\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:	movsb\n"			\
+			"2:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"3:	pushl %0\n"			\
+			"	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	rep; stosl\n"			\
+			"	stosb\n"			\
+			"	popl %%eax\n"			\
+			"	popl %0\n"			\
+			"	shl $2,%0\n"			\
+			"	incl %0\n"			\
+			"	jmp 2b\n"			\
+			"4:	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	stosb\n"			\
+			"	popl %%eax\n"			\
+			"	incl %0\n"			\
+			"	jmp 2b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,3b\n"			\
+			"	.long 1b,4b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	case 2:							\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:	movsw\n"			\
+			"2:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"3:	pushl %0\n"			\
+			"	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	rep; stosl\n"			\
+			"	stosw\n"			\
+			"	popl %%eax\n"			\
+			"	popl %0\n"			\
+			"	shl $2,%0\n"			\
+			"	addl $2,%0\n"			\
+			"	jmp 2b\n"			\
+			"4:	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	stosw\n"			\
+			"	popl %%eax\n"			\
+			"	addl $2,%0\n"			\
+			"	jmp 2b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,3b\n"			\
+			"	.long 1b,4b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	case 3:							\
+		__asm__ __volatile__(				\
+			"0:	rep; movsl\n"			\
+			"1:	movsw\n"			\
+			"2:	movsb\n"			\
+			"3:\n"					\
+			".section .fixup,\"ax\"\n"		\
+			"4:	pushl %0\n"			\
+			"	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	rep; stosl\n"			\
+			"	stosw\n"			\
+			"	stosb\n"			\
+			"	popl %%eax\n"			\
+			"	popl %0\n"			\
+			"	shl $2,%0\n"			\
+			"	addl $3,%0\n"			\
+			"	jmp 2b\n"			\
+			"5:	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	stosw\n"			\
+			"	stosb\n"			\
+			"	popl %%eax\n"			\
+			"	addl $3,%0\n"			\
+			"	jmp 2b\n"			\
+			"6:	pushl %%eax\n"			\
+			"	xorl %%eax,%%eax\n"		\
+			"	stosb\n"			\
+			"	popl %%eax\n"			\
+			"	incl %0\n"			\
+			"	jmp 3b\n"			\
+			".previous\n"				\
+			".section __ex_table,\"a\"\n"		\
+			"	.align 4\n"			\
+			"	.long 0b,4b\n"			\
+			"	.long 1b,5b\n"			\
+			"	.long 2b,6b\n"			\
+			".previous"				\
+			: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+			: "1"(from), "2"(to), "0"(size/4)	\
+			: "memory");				\
+		break;						\
+	}							\
+} while (0)
+
+unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+
+static inline unsigned long
+__constant_copy_to_user(void *to, const void *from, unsigned long n)
+{
+	prefetch(from);
+	if (access_ok(VERIFY_WRITE, to, n))
+		__constant_copy_user(to,from,n);
+	return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user(void *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		__constant_copy_user_zeroing(to,from,n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+static inline unsigned long
+__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+	__constant_copy_user(to,from,n);
+	return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+	__constant_copy_user_zeroing(to,from,n);
+	return n;
+}
+
+#define copy_to_user(to,from,n)				\
+	(__builtin_constant_p(n) ?			\
+	 __constant_copy_to_user((to),(from),(n)) :	\
+	 __generic_copy_to_user((to),(from),(n)))
+
+#define copy_from_user(to,from,n)			\
+	(__builtin_constant_p(n) ?			\
+	 __constant_copy_from_user((to),(from),(n)) :	\
+	 __generic_copy_from_user((to),(from),(n)))
+
+#define __copy_to_user(to,from,n)			\
+	(__builtin_constant_p(n) ?			\
+	 __constant_copy_to_user_nocheck((to),(from),(n)) :	\
+	 __generic_copy_to_user_nocheck((to),(from),(n)))
+
+#define __copy_from_user(to,from,n)			\
+	(__builtin_constant_p(n) ?			\
+	 __constant_copy_from_user_nocheck((to),(from),(n)) :	\
+	 __generic_copy_from_user_nocheck((to),(from),(n)))
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+long strnlen_user(const char *str, long n);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+#endif /* __i386_UACCESS_H */
diff --git a/xen/include/asm-i386/unaligned.h b/xen/include/asm-i386/unaligned.h
new file mode 100644
index 0000000000..7acd795762
--- /dev/null
+++ b/xen/include/asm-i386/unaligned.h
@@ -0,0 +1,37 @@
+#ifndef __I386_UNALIGNED_H
+#define __I386_UNALIGNED_H
+
+/*
+ * The i386 can do unaligned accesses itself. 
+ *
+ * The strange macros are there to make sure these can't
+ * be misused in a way that makes them not work on other
+ * architectures where unaligned accesses aren't as simple.
+ */
+
+/**
+ * get_unaligned - get value from possibly mis-aligned location
+ * @ptr: pointer to value
+ *
+ * This macro should be used for accessing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. retrieving a u16 value from a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define get_unaligned(ptr) (*(ptr))
+
+/**
+ * put_unaligned - put value to a possibly mis-aligned location
+ * @val: value to place
+ * @ptr: pointer to location
+ *
+ * This macro should be used for placing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. writing a u16 value to a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+#endif
diff --git a/xen/include/hypervisor-ifs/block.h b/xen/include/hypervisor-ifs/block.h
new file mode 100644
index 0000000000..627055bf0b
--- /dev/null
+++ b/xen/include/hypervisor-ifs/block.h
@@ -0,0 +1,78 @@
+/******************************************************************************
+ * block.h
+ *
+ * Block IO communication rings.
+ *
+ * These are the ring data structures for buffering messages between 
+ * the hypervisor and guestos's.  
+ *
+ */
+
+#ifndef __BLOCK_H__
+#define __BLOCK_H__
+
+#include <linux/kdev_t.h>
+
+/* the first four definitions match fs.h */
+#define XEN_BLOCK_READ  0
+#define XEN_BLOCK_WRITE 1
+#define XEN_BLOCK_READA 2                                /* currently unused */
+#define XEN_BLOCK_SPECIAL 4                              /* currently unused */
+#define XEN_BLOCK_PROBE 8      /* determine io configuration from hypervisor */
+#define XEN_BLOCK_DEBUG 16                                          /* debug */
+
+#define BLK_RING_SIZE        128
+#define BLK_RING_MAX_ENTRIES (BLK_RING_SIZE - 2)
+#define BLK_RING_INC(_i)     (((_i)+1) & (BLK_RING_SIZE-1))
+#define BLK_RING_ADD(_i,_j)  (((_i)+(_j)) & (BLK_RING_SIZE-1))
+
+typedef struct blk_ring_req_entry 
+{
+    void *          id;                /* for guest os use */
+    int             operation;         /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */
+    char *          buffer;
+    unsigned long   block_number;      /* block number */
+    unsigned short  block_size;        /* block size */
+    kdev_t          device;
+    unsigned long   sector_number;     /* real buffer location on disk */
+} blk_ring_req_entry_t;
+
+typedef struct blk_ring_resp_entry
+{
+    void *id;
+    unsigned long status;
+} blk_ring_resp_entry_t;
+
+typedef struct blk_ring_st 
+{
+    unsigned int req_prod;  /* Request producer. Updated by guest OS. */
+    unsigned int resp_prod; /* Response producer. Updated by Xen.     */
+    union {
+        blk_ring_req_entry_t  req;
+        blk_ring_resp_entry_t resp;
+    } ring[BLK_RING_SIZE];
+} blk_ring_t;
+
+#define MAX_XEN_DISK_COUNT 100
+
+#define XEN_DISK_IDE  1
+#define XEN_DISK_SCSI 2
+
+typedef struct xen_disk                                     /* physical disk */
+{
+  int           type;                                           /* disk type */
+  unsigned long capacity;
+  unsigned char heads;                               /* hdreg.h::hd_geometry */
+  unsigned char sectors;                             /* hdreg.h::hd_geometry */
+  unsigned int  cylinders;                       /* hdreg.h::hd_big_geometry */
+  unsigned long start;                               /* hdreg.h::hd_geometry */
+  void *        gendisk;                               /* struct gendisk ptr */
+} xen_disk_t;
+
+typedef struct xen_disk_info
+{
+  int         count; /* number of subsequent xen_disk_t structures to follow */
+  xen_disk_t  disks[100];
+} xen_disk_info_t;
+
+#endif
diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h
new file mode 100644
index 0000000000..6ecac5848e
--- /dev/null
+++ b/xen/include/hypervisor-ifs/hypervisor-if.h
@@ -0,0 +1,209 @@
+/******************************************************************************
+ * hypervisor-if.h
+ * 
+ * Interface to Xeno hypervisor.
+ */
+
+#include "network.h"
+#include "block.h"
+
+#ifndef __HYPERVISOR_IF_H__
+#define __HYPERVISOR_IF_H__
+
+/*
+ * Virtual addresses beyond this are not modifiable by guest OSes.
+ * The machine->physical mapping table starts at this address, read-only
+ * to all domains except DOM0.
+ */
+#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+typedef struct trap_info_st
+{
+    unsigned char  vector;  /* exception/interrupt vector */
+    unsigned char  dpl;     /* privilege level            */
+    unsigned short cs;      /* code selector              */
+    unsigned long  address; /* code address               */
+} trap_info_t;
+
+
+typedef struct
+{
+/*
+ * PGREQ_XXX: specified in least-significant bits of 'ptr' field.
+ * All requests specify relevent PTE or PT address in 'ptr'.
+ * Normal requests specify update value in 'value'.
+ * Extended requests specify command in least 8 bits of 'value'.
+ */
+/* A normal page-table update request. */
+#define PGREQ_NORMAL           0
+/* Update an entry in the machine->physical mapping table. */
+#define PGREQ_MPT_UPDATE       1
+/* An extended command. */
+#define PGREQ_EXTENDED_COMMAND 2
+/* DOM0 can make entirely unchecked updates which do not affect refcnts. */
+#define PGREQ_UNCHECKED_UPDATE 3
+    unsigned long ptr, val; /* *ptr = val */
+/* Announce a new top-level page table. */
+#define PGEXT_PIN_L1_TABLE      0
+#define PGEXT_PIN_L2_TABLE      1
+#define PGEXT_PIN_L3_TABLE      2
+#define PGEXT_PIN_L4_TABLE      3
+#define PGEXT_UNPIN_TABLE       4
+#define PGEXT_NEW_BASEPTR       5
+#define PGEXT_TLB_FLUSH         6
+#define PGEXT_INVLPG            7
+#define PGEXT_CMD_MASK        255
+#define PGEXT_CMD_SHIFT         8
+} page_update_request_t;
+
+
+/*
+ * Segment descriptor tables.
+ */
+/* 8 entries, plus a TSS entry for each CPU (up to 32 CPUs). */
+#define FIRST_DOMAIN_GDT_ENTRY  40
+/* These are flat segments for domain bootstrap and fallback. */
+#define FLAT_RING1_CS           0x11
+#define FLAT_RING1_DS           0x19
+#define FLAT_RING3_CS           0x23
+#define FLAT_RING3_DS           0x2b
+
+
+/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */
+
+#define __HYPERVISOR_set_trap_table        0
+#define __HYPERVISOR_pt_update             1
+#define __HYPERVISOR_console_write         2
+#define __HYPERVISOR_set_gdt               3
+#define __HYPERVISOR_stack_and_ldt_switch  4
+#define __HYPERVISOR_net_update            5
+#define __HYPERVISOR_fpu_taskswitch        6
+#define __HYPERVISOR_sched_op              7
+#define __HYPERVISOR_exit                  8
+#define __HYPERVISOR_dom0_op               9
+#define __HYPERVISOR_network_op           10
+#define __HYPERVISOR_block_io_op          11
+#define __HYPERVISOR_set_debugreg         12
+#define __HYPERVISOR_get_debugreg         13
+#define __HYPERVISOR_update_descriptor    14
+#define __HYPERVISOR_set_fast_trap        15
+
+#define TRAP_INSTR "int $0x82"
+
+
+/* Event message note:
+ *
+ * Here, as in the interrupts to the guestos, additional network interfaces
+ * are defined.  These definitions server as placeholders for the event bits,
+ * however, in the code these events will allways be referred to as shifted
+ * offsets from the base NET events.
+ */
+
+/* Events that a guest OS may receive from the hypervisor. */
+#define EVENT_BLK_RESP 0x01 /* A block device response has been queued. */
+#define EVENT_TIMER    0x02 /* A timeout has been updated. */
+#define EVENT_DIE      0x04 /* OS is about to be killed. Clean up please! */
+#define EVENT_DEBUG    0x08 /* Request guest to dump debug info (gross!) */
+#define EVENT_NET_TX   0x10 /* There are packets for transmission. */
+#define EVENT_NET_RX   0x20 /* There are empty buffers for receive. */
+
+/* Bit offsets, as opposed to the above masks. */
+#define _EVENT_BLK_RESP 0
+#define _EVENT_TIMER    1
+#define _EVENT_DIE      2
+#define _EVENT_NET_TX   3
+#define _EVENT_NET_RX   4
+#define _EVENT_DEBUG    5
+
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct shared_info_st {
+
+    /* Bitmask of outstanding event notifications hypervisor -> guest OS. */
+    unsigned long events;
+    /*
+     * Hypervisor will only signal event delivery via the "callback
+     * exception" when this value is non-zero. Hypervisor clears this when
+     * notiying the guest OS -- this prevents unbounded reentrancy and
+     * stack overflow (in this way, acts as an interrupt-enable flag).
+     */
+    unsigned long events_enable;
+
+    /*
+     * Address for callbacks hypervisor -> guest OS.
+     * Stack frame looks like that of an interrupt.
+     * Code segment is the default flat selector.
+     * This handler will only be called when events_enable is non-zero.
+     */
+    unsigned long event_address;
+
+    /*
+     * Hypervisor uses this callback when it takes a fault on behalf of
+     * an application. This can happen when returning from interrupts for
+     * example: various faults can occur when reloading the segment
+     * registers, and executing 'iret'.
+     * This callback is provided with an extended stack frame, augmented
+     * with saved values for segment registers %ds and %es:
+     *  %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
+     * Code segment is the default flat selector.
+     * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!!
+     */
+    unsigned long failsafe_address;
+
+	/*
+     * Time:
+     * The following abstractions are exposed: System Time, Wall Clock 
+     * Time, Domain Virtual Time. Domains can access Cycle counter time
+     * directly. 
+	 * XXX RN: Need something to pass NTP scaling to GuestOS.
+     */
+
+	u64           cpu_freq;	    /* to calculate ticks -> real time */
+
+	/* System Time */
+	long long          system_time;     /* in ns */
+	unsigned long      st_timestamp;    /* cyclecounter at last update */
+
+	/* Wall Clock Time */
+	u32                wc_version;      /* a version number for info below */
+	long               tv_sec;          /* essentially a struct timeval */
+	long               tv_usec;
+	long long          wc_timestamp;    /* system time at last update */
+
+	/* Domain Virtual Time */
+	unsigned long long domain_time;
+	
+	/*
+     * Timeout values:
+     * Allow a domain to specify a timeout value in system time and 
+     * domain virtual time.
+     */
+    unsigned long long wall_timeout;
+    unsigned long long domain_timeout;
+
+} shared_info_t;
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct start_info_st {
+    unsigned long nr_pages;       /* total pages allocated to this domain */
+    shared_info_t *shared_info;   /* VIRTUAL address of shared info struct */
+    unsigned long  pt_base;       /* VIRTUAL address of page directory */
+    unsigned long mod_start;      /* VIRTUAL address of pre-loaded module */
+    unsigned long mod_len;        /* size (bytes) of pre-loaded module */
+    net_ring_t *net_rings;        /* network rings (VIRTUAL ADDRESS) */
+    int num_net_rings;
+    unsigned long blk_ring;       /* block io ring (MACHINE ADDRESS) */
+    unsigned char cmd_line[1];    /* variable-length */
+} start_info_t;
+
+/* For use in guest OSes. */
+extern shared_info_t *HYPERVISOR_shared_info;
+
+#endif /* __HYPERVISOR_IF_H__ */
diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h
new file mode 100644
index 0000000000..1e4e7e1c53
--- /dev/null
+++ b/xen/include/hypervisor-ifs/network.h
@@ -0,0 +1,131 @@
+/******************************************************************************
+ * network.h
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's.  As it stands this is only used for network buffer exchange.
+ *
+ * This file also contains structures and interfaces for the per-domain
+ * routing/filtering tables in the hypervisor.
+ *
+ */
+
+#ifndef __RING_H__
+#define __RING_H__
+
+#include <linux/types.h>
+
+typedef struct tx_entry_st {
+	unsigned long  addr;   /* machine address of packet */
+	unsigned short size;   /* in bytes */
+        unsigned short status; /* per descriptor status. */
+} tx_entry_t;
+
+typedef struct rx_entry_st {
+	unsigned long  addr;   /* machine address of PTE to swizzle */
+	unsigned short size;   /* in bytes */
+        unsigned short status; /* per descriptor status. */
+} rx_entry_t;
+
+#define TX_RING_SIZE 256
+#define RX_RING_SIZE 256
+typedef struct net_ring_st {
+    /*
+     * Guest OS places packets into ring at tx_prod.
+     * Hypervisor removes at tx_cons.
+     * Ring is empty when tx_prod == tx_cons.
+     * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event.
+     * Hypervisor may be prodded whenever tx_prod is updated, but this is
+     * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled).
+     */
+    tx_entry_t	*tx_ring;
+    unsigned int tx_prod, tx_cons, tx_event;
+
+    /*
+     * Guest OS places empty buffers into ring at rx_prod.
+     * Hypervisor fills buffers as rx_cons.
+     * Ring is empty when rx_prod == rx_cons.
+     * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event.
+     * Hypervisor may be prodded whenever rx_prod is updated, but this is
+     * only necessary when rx_cons == old_rx_prod (ie. receiver stalled).
+     */
+    rx_entry_t	*rx_ring;
+    unsigned int rx_prod, rx_cons, rx_event;
+} net_ring_t;
+
+/* Specify base of per-domain array. Get returned free slot in the array. */
+/*net_ring_t *create_net_vif(int domain);*/
+
+/* Packet routing/filtering code follows:
+ */
+
+#define NETWORK_ACTION_ACCEPT   0
+#define NETWORK_ACTION_COUNT    1
+
+#define NETWORK_PROTO_ANY       0
+#define NETWORK_PROTO_IP        1
+#define NETWORK_PROTO_TCP       2
+#define NETWORK_PROTO_UDP       3
+#define NETWORK_PROTO_ARP       4
+
+typedef struct net_rule_st 
+{
+    u32  src_addr;
+    u32  dst_addr;
+    u16  src_port;
+    u16  dst_port;
+    u32  src_addr_mask;
+    u32  dst_addr_mask;
+    u16  src_port_mask;
+    u16  dst_port_mask;
+    u16  proto;
+    
+    int  src_interface;
+    int  dst_interface;
+    u16  action;
+} net_rule_t;
+
+typedef struct vif_query_st
+{
+    unsigned int    domain;
+    char            *buf;   // where to put the reply -- guest virtual address
+} vif_query_t;
+
+/* Network trap operations and associated structure. 
+ * This presently just handles rule insertion and deletion, but will
+ * evenually have code to add and remove interfaces.
+ */
+
+#define NETWORK_OP_ADDRULE      0
+#define NETWORK_OP_DELETERULE   1
+#define NETWORK_OP_GETRULELIST  2
+#define NETWORK_OP_VIFQUERY     3
+
+typedef struct network_op_st 
+{
+    unsigned long cmd;
+    union
+    {
+        net_rule_t net_rule;
+        vif_query_t vif_query;
+    }
+    u;
+} network_op_t;
+
+typedef struct net_rule_ent_st
+{
+    net_rule_t r;
+    struct net_rule_ent_st *next;
+} net_rule_ent_t;
+
+/* Drop a new rule down to the network tables. */
+int add_net_rule(net_rule_t *rule);
+
+
+/* Descriptor status values:
+ */
+
+#define RING_STATUS_OK               0  // Everything is gravy.
+#define RING_STATUS_ERR_CFU         -1  // Copy from user problems.
+#define RING_STATUS_BAD_PAGE        -2  // What they gave us was pure evil.
+
+#endif
diff --git a/xen/include/scsi/scsi.h b/xen/include/scsi/scsi.h
new file mode 100644
index 0000000000..ffcb419482
--- /dev/null
+++ b/xen/include/scsi/scsi.h
@@ -0,0 +1,237 @@
+#ifndef _LINUX_SCSI_H
+#define _LINUX_SCSI_H
+
+/*
+ * This header file contains public constants and structures used by
+ * the scsi code for linux.
+ */
+
+/*
+    $Header: /usr/src/linux/include/linux/RCS/scsi.h,v 1.3 1993/09/24 12:20:33 drew Exp $
+
+    For documentation on the OPCODES, MESSAGES, and SENSE values,
+    please consult the SCSI standard.
+
+*/
+
+/*
+ *      SCSI opcodes
+ */
+
+#define TEST_UNIT_READY       0x00
+#define REZERO_UNIT           0x01
+#define REQUEST_SENSE         0x03
+#define FORMAT_UNIT           0x04
+#define READ_BLOCK_LIMITS     0x05
+#define REASSIGN_BLOCKS       0x07
+#define READ_6                0x08
+#define WRITE_6               0x0a
+#define SEEK_6                0x0b
+#define READ_REVERSE          0x0f
+#define WRITE_FILEMARKS       0x10
+#define SPACE                 0x11
+#define INQUIRY               0x12
+#define RECOVER_BUFFERED_DATA 0x14
+#define MODE_SELECT           0x15
+#define RESERVE               0x16
+#define RELEASE               0x17
+#define COPY                  0x18
+#define ERASE                 0x19
+#define MODE_SENSE            0x1a
+#define START_STOP            0x1b
+#define RECEIVE_DIAGNOSTIC    0x1c
+#define SEND_DIAGNOSTIC       0x1d
+#define ALLOW_MEDIUM_REMOVAL  0x1e
+
+#define SET_WINDOW            0x24
+#define READ_CAPACITY         0x25
+#define READ_10               0x28
+#define WRITE_10              0x2a
+#define SEEK_10               0x2b
+#define WRITE_VERIFY          0x2e
+#define VERIFY                0x2f
+#define SEARCH_HIGH           0x30
+#define SEARCH_EQUAL          0x31
+#define SEARCH_LOW            0x32
+#define SET_LIMITS            0x33
+#define PRE_FETCH             0x34
+#define READ_POSITION         0x34
+#define SYNCHRONIZE_CACHE     0x35
+#define LOCK_UNLOCK_CACHE     0x36
+#define READ_DEFECT_DATA      0x37
+#define MEDIUM_SCAN           0x38
+#define COMPARE               0x39
+#define COPY_VERIFY           0x3a
+#define WRITE_BUFFER          0x3b
+#define READ_BUFFER           0x3c
+#define UPDATE_BLOCK          0x3d
+#define READ_LONG             0x3e
+#define WRITE_LONG            0x3f
+#define CHANGE_DEFINITION     0x40
+#define WRITE_SAME            0x41
+#define READ_TOC              0x43
+#define LOG_SELECT            0x4c
+#define LOG_SENSE             0x4d
+#define MODE_SELECT_10        0x55
+#define RESERVE_10            0x56
+#define RELEASE_10            0x57
+#define MODE_SENSE_10         0x5a
+#define PERSISTENT_RESERVE_IN 0x5e
+#define PERSISTENT_RESERVE_OUT 0x5f
+#define MOVE_MEDIUM           0xa5
+#define READ_12               0xa8
+#define WRITE_12              0xaa
+#define WRITE_VERIFY_12       0xae
+#define SEARCH_HIGH_12        0xb0
+#define SEARCH_EQUAL_12       0xb1
+#define SEARCH_LOW_12         0xb2
+#define READ_ELEMENT_STATUS   0xb8
+#define SEND_VOLUME_TAG       0xb6
+#define WRITE_LONG_2          0xea
+
+/*
+ *  Status codes
+ */
+
+#define GOOD                 0x00
+#define CHECK_CONDITION      0x01
+#define CONDITION_GOOD       0x02
+#define BUSY                 0x04
+#define INTERMEDIATE_GOOD    0x08
+#define INTERMEDIATE_C_GOOD  0x0a
+#define RESERVATION_CONFLICT 0x0c
+#define COMMAND_TERMINATED   0x11
+#define QUEUE_FULL           0x14
+
+#define STATUS_MASK          0x3e
+
+/*
+ *  SENSE KEYS
+ */
+
+#define NO_SENSE            0x00
+#define RECOVERED_ERROR     0x01
+#define NOT_READY           0x02
+#define MEDIUM_ERROR        0x03
+#define HARDWARE_ERROR      0x04
+#define ILLEGAL_REQUEST     0x05
+#define UNIT_ATTENTION      0x06
+#define DATA_PROTECT        0x07
+#define BLANK_CHECK         0x08
+#define COPY_ABORTED        0x0a
+#define ABORTED_COMMAND     0x0b
+#define VOLUME_OVERFLOW     0x0d
+#define MISCOMPARE          0x0e
+
+
+/*
+ *  DEVICE TYPES
+ */
+
+#define TYPE_DISK           0x00
+#define TYPE_TAPE           0x01
+#define TYPE_PRINTER        0x02
+#define TYPE_PROCESSOR      0x03    /* HP scanners use this */
+#define TYPE_WORM           0x04    /* Treated as ROM by our system */
+#define TYPE_ROM            0x05
+#define TYPE_SCANNER        0x06
+#define TYPE_MOD            0x07    /* Magneto-optical disk - 
+				     * - treated as TYPE_DISK */
+#define TYPE_MEDIUM_CHANGER 0x08
+#define TYPE_COMM           0x09    /* Communications device */
+#define TYPE_ENCLOSURE      0x0d    /* Enclosure Services Device */
+#define TYPE_NO_LUN         0x7f
+
+/*
+ * standard mode-select header prepended to all mode-select commands
+ *
+ * moved here from cdrom.h -- kraxel
+ */
+
+struct ccs_modesel_head
+{
+    u_char  _r1;    /* reserved */
+    u_char  medium; /* device-specific medium type */
+    u_char  _r2;    /* reserved */
+    u_char  block_desc_length; /* block descriptor length */
+    u_char  density; /* device-specific density code */
+    u_char  number_blocks_hi; /* number of blocks in this block desc */
+    u_char  number_blocks_med;
+    u_char  number_blocks_lo;
+    u_char  _r3;
+    u_char  block_length_hi; /* block length for blocks in this desc */
+    u_char  block_length_med;
+    u_char  block_length_lo;
+};
+
+/*
+ *  MESSAGE CODES
+ */
+
+#define COMMAND_COMPLETE    0x00
+#define EXTENDED_MESSAGE    0x01
+#define     EXTENDED_MODIFY_DATA_POINTER    0x00
+#define     EXTENDED_SDTR                   0x01
+#define     EXTENDED_EXTENDED_IDENTIFY      0x02    /* SCSI-I only */
+#define     EXTENDED_WDTR                   0x03
+#define SAVE_POINTERS       0x02
+#define RESTORE_POINTERS    0x03
+#define DISCONNECT          0x04
+#define INITIATOR_ERROR     0x05
+#define ABORT               0x06
+#define MESSAGE_REJECT      0x07
+#define NOP                 0x08
+#define MSG_PARITY_ERROR    0x09
+#define LINKED_CMD_COMPLETE 0x0a
+#define LINKED_FLG_CMD_COMPLETE 0x0b
+#define BUS_DEVICE_RESET    0x0c
+
+#define INITIATE_RECOVERY   0x0f            /* SCSI-II only */
+#define RELEASE_RECOVERY    0x10            /* SCSI-II only */
+
+#define SIMPLE_QUEUE_TAG    0x20
+#define HEAD_OF_QUEUE_TAG   0x21
+#define ORDERED_QUEUE_TAG   0x22
+
+/*
+ * Here are some scsi specific ioctl commands which are sometimes useful.
+ */
+/* These are a few other constants  only used by scsi  devices */
+/* Note that include/linux/cdrom.h also defines IOCTL 0x5300 - 0x5395 */
+
+#define SCSI_IOCTL_GET_IDLUN 0x5382	/* conflicts with CDROMAUDIOBUFSIZ */
+
+/* Used to turn on and off tagged queuing for scsi devices */
+
+#define SCSI_IOCTL_TAGGED_ENABLE 0x5383
+#define SCSI_IOCTL_TAGGED_DISABLE 0x5384
+
+/* Used to obtain the host number of a device. */
+#define SCSI_IOCTL_PROBE_HOST 0x5385
+
+/* Used to get the bus number for a device */
+#define SCSI_IOCTL_GET_BUS_NUMBER 0x5386
+
+/* Used to get the PCI location of a device */
+#define SCSI_IOCTL_GET_PCI 0x5387
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4 
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
+
+#endif
diff --git a/xen/include/scsi/scsi_ioctl.h b/xen/include/scsi/scsi_ioctl.h
new file mode 100644
index 0000000000..937cadfb91
--- /dev/null
+++ b/xen/include/scsi/scsi_ioctl.h
@@ -0,0 +1,51 @@
+#ifndef _SCSI_IOCTL_H
+#define _SCSI_IOCTL_H 
+
+#define SCSI_IOCTL_SEND_COMMAND 1
+#define SCSI_IOCTL_TEST_UNIT_READY 2
+#define SCSI_IOCTL_BENCHMARK_COMMAND 3
+#define SCSI_IOCTL_SYNC 4			/* Request synchronous parameters */
+#define SCSI_IOCTL_START_UNIT 5
+#define SCSI_IOCTL_STOP_UNIT 6
+/* The door lock/unlock constants are compatible with Sun constants for
+   the cdrom */
+#define SCSI_IOCTL_DOORLOCK 0x5380		/* lock the eject mechanism */
+#define SCSI_IOCTL_DOORUNLOCK 0x5381		/* unlock the mechanism	  */
+
+#define	SCSI_REMOVAL_PREVENT	1
+#define	SCSI_REMOVAL_ALLOW	0
+
+#ifdef __KERNEL__
+
+/*
+ * Structures used for scsi_ioctl et al.
+ */
+
+typedef struct scsi_ioctl_command {
+	unsigned int inlen;
+	unsigned int outlen;
+	unsigned char data[0];
+} Scsi_Ioctl_Command;
+
+typedef struct scsi_idlun {
+	__u32 dev_id;
+	__u32 host_unique_id;
+} Scsi_Idlun;
+
+/* Fibre Channel WWN, port_id struct */
+typedef struct scsi_fctargaddress
+{
+	__u32 host_port_id;
+	unsigned char host_wwn[8]; // include NULL term.
+} Scsi_FCTargAddress;
+
+extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int scsi_ioctl_send_command(Scsi_Device *dev,
+				   Scsi_Ioctl_Command *arg);
+
+#endif
+
+#endif
+
+
diff --git a/xen/include/scsi/scsicam.h b/xen/include/scsi/scsicam.h
new file mode 100644
index 0000000000..13e9378f55
--- /dev/null
+++ b/xen/include/scsi/scsicam.h
@@ -0,0 +1,19 @@
+/*
+ * scsicam.h - SCSI CAM support functions, use for HDIO_GETGEO, etc.
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing 
+ *      (Unix and Linux consulting and custom programming)
+ *      drew@Colorado.EDU
+ *	+1 (303) 786-7975
+ *
+ * For more information, please consult the SCSI-CAM draft.
+ */
+
+#ifndef SCSICAM_H
+#define SCSICAM_H
+#include <xeno/kdev_t.h>
+extern int scsicam_bios_param (Disk *disk, kdev_t dev, int *ip);
+extern int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+           unsigned int  *cyls, unsigned int *hds, unsigned int *secs);
+#endif /* def SCSICAM_H */
diff --git a/xen/include/scsi/sg.h b/xen/include/scsi/sg.h
new file mode 100644
index 0000000000..ccb47c88bb
--- /dev/null
+++ b/xen/include/scsi/sg.h
@@ -0,0 +1,330 @@
+#ifndef _SCSI_GENERIC_H
+#define _SCSI_GENERIC_H
+
+/*
+   History:
+    Started: Aug 9 by Lawrence Foard (entropy@world.std.com), to allow user
+     process control of SCSI devices.
+    Development Sponsored by Killy Corp. NY NY
+Original driver (sg.h):
+*       Copyright (C) 1992 Lawrence Foard
+Version 2 and 3 extensions to driver:
+*       Copyright (C) 1998 - 2002 Douglas Gilbert
+
+    Version: 3.1.23 (20020318)
+    This version is for 2.4 series kernels.
+
+    Changes since 3.1.22 (20011208)
+	- change EACCES to EPERM when O_RDONLY is insufficient
+	- suppress newlines in host string ( /proc/scsi/sg/host_strs output)
+	- fix xfer direction, old interface, short reply_len [Travers Carter]
+    Changes since 3.1.21 (20011029)
+    	- add support for SG_FLAG_MMAP_IO [permit mmap() on sg devices]
+    	- update documentation pointers in this header
+    	- put KERNEL_VERSION macros around code that breaks early 2.4 series
+    	- fix use count for multiple queued requests on closed fd
+    	- switch back to alloc_kiovec()
+    Changes since 3.1.20 (20010814)
+	- use alloc_kiovec_sz() to speed dio [set num_buffer_heads==0]
+	- changes to cope with larger scatter gather element sizes
+	- clean up some printk()s
+	- add MODULE_LICENSE("GPL") [in a 3.1.20 subversion]
+	- fix race around generic_unplug_device() [in a 3.1.20 subversion]
+    Changes since 3.1.19 (20010623)
+	- add SG_GET_ACCESS_COUNT ioctl 
+	- make open() increment and close() decrement access_count
+	- only register first 256 devices, reject subsequent devices
+    Changes since 3.1.18 (20010505)
+	- fix bug that caused long wait when large buffer requested
+	- fix leak in error case of sg_new_read() [report: Eric Barton]
+	- add 'online' column to /proc/scsi/sg/devices
+    Changes since 3.1.17 (20000921)
+    	- add CAP_SYS_RAWIO capability for sensitive stuff
+    	- compile in dio stuff, procfs 'allow_dio' defaulted off (0)
+	- make premature close and detach more robust
+	- lun masked into commands <= SCSI_2
+	- poll() and async notification now yield POLL_HUP on detach
+	- various 3rd party tweaks tracking lk 2.4 internal changes
+
+Map of SG verions to the Linux kernels in which they appear:
+       ----------        ----------------------------------
+       original          all kernels < 2.2.6
+       2.1.40            2.2.20
+       3.0.x             optional version 3 sg driver for 2.2 series
+       3.1.17++          2.4.0++
+
+Major new features in SG 3.x driver (cf SG 2.x drivers)
+	- SG_IO ioctl() combines function if write() and read()
+	- new interface (sg_io_hdr_t) but still supports old interface
+	- scatter/gather in user space, direct IO, and mmap supported
+
+ The normal action of this driver is to use the adapter (HBA) driver to DMA
+ data into kernel buffers and then use the CPU to copy the data into the 
+ user space (vice versa for writes). That is called "indirect" IO due to 
+ the double handling of data. There are two methods offered to remove the
+ redundant copy: 1) direct IO which uses the kernel kiobuf mechanism and 
+ 2) using the mmap() system call to map the reserve buffer (this driver has 
+ one reserve buffer per fd) into the user space. Both have their advantages.
+ In terms of absolute speed mmap() is faster. If speed is not a concern, 
+ indirect IO should be fine. Read the documentation for more information.
+
+ ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' may be
+         needed. That pseudo file's content is defaulted to 0. **
+ 
+ Historical note: this SCSI pass-through driver has been known as "sg" for 
+ a decade. In broader kernel discussions "sg" is used to refer to scatter
+ gather techniques. The context should clarify which "sg" is referred to.
+
+ Documentation
+ =============
+ A web site for the SG device driver can be found at:
+	http://www.torque.net/sg  [alternatively check the MAINTAINERS file]
+ The documentation for the sg version 3 driver can be found at:
+ 	http://www.torque.net/sg/p/sg_v3_ho.html
+ This is a rendering from DocBook source [change the extension to "sgml"
+ or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon).
+
+ The older, version 2 documents discuss the original sg interface in detail:
+	http://www.torque.net/sg/p/scsi-generic.txt
+	http://www.torque.net/sg/p/scsi-generic_long.txt
+ A version of this document (potentially out of date) may also be found in
+ the kernel source tree, probably at:
+        /usr/src/linux/Documentation/scsi-generic.txt .
+
+ Utility and test programs are available at the sg web site. They are 
+ bundled as sg_utils (for the lk 2.2 series) and sg3_utils (for the
+ lk 2.4 series).
+
+ There is a HOWTO on the Linux SCSI subsystem in the lk 2.4 series at:
+ 	http://www.linuxdoc.org/HOWTO/SCSI-2.4-HOWTO
+*/
+
+
+/* New interface introduced in the 3.x SG drivers follows */
+
+typedef struct sg_iovec /* same structure as used by readv() Linux system */
+{                       /* call. It defines one scatter-gather element. */
+    void * iov_base;            /* Starting address  */
+    size_t iov_len;             /* Length in bytes  */
+} sg_iovec_t;
+
+
+typedef struct sg_io_hdr
+{
+    int interface_id;           /* [i] 'S' for SCSI generic (required) */
+    int dxfer_direction;        /* [i] data transfer direction  */
+    unsigned char cmd_len;      /* [i] SCSI command length ( <= 16 bytes) */
+    unsigned char mx_sb_len;    /* [i] max length to write to sbp */
+    unsigned short iovec_count; /* [i] 0 implies no scatter gather */
+    unsigned int dxfer_len;     /* [i] byte count of data transfer */
+    void * dxferp;              /* [i], [*io] points to data transfer memory
+					      or scatter gather list */
+    unsigned char * cmdp;       /* [i], [*i] points to command to perform */
+    unsigned char * sbp;        /* [i], [*o] points to sense_buffer memory */
+    unsigned int timeout;       /* [i] MAX_UINT->no timeout (unit: millisec) */
+    unsigned int flags;         /* [i] 0 -> default, see SG_FLAG... */
+    int pack_id;                /* [i->o] unused internally (normally) */
+    void * usr_ptr;             /* [i->o] unused internally */
+    unsigned char status;       /* [o] scsi status */
+    unsigned char masked_status;/* [o] shifted, masked scsi status */
+    unsigned char msg_status;   /* [o] messaging level data (optional) */
+    unsigned char sb_len_wr;    /* [o] byte count actually written to sbp */
+    unsigned short host_status; /* [o] errors from host adapter */
+    unsigned short driver_status;/* [o] errors from software driver */
+    int resid;                  /* [o] dxfer_len - actual_transferred */
+    unsigned int duration;      /* [o] time taken by cmd (unit: millisec) */
+    unsigned int info;          /* [o] auxiliary information */
+} sg_io_hdr_t;  /* 64 bytes long (on i386) */
+
+/* Use negative values to flag difference from original sg_header structure */
+#define SG_DXFER_NONE (-1)      /* e.g. a SCSI Test Unit Ready command */
+#define SG_DXFER_TO_DEV (-2)    /* e.g. a SCSI WRITE command */
+#define SG_DXFER_FROM_DEV (-3)  /* e.g. a SCSI READ command */
+#define SG_DXFER_TO_FROM_DEV (-4) /* treated like SG_DXFER_FROM_DEV with the
+				   additional property than during indirect
+				   IO the user buffer is copied into the
+				   kernel buffers before the transfer */
+#define SG_DXFER_UNKNOWN (-5)   /* Unknown data direction */
+
+/* following flag values can be "or"-ed together */
+#define SG_FLAG_DIRECT_IO 1     /* default is indirect IO */
+#define SG_FLAG_LUN_INHIBIT 2   /* default is overwrite lun in SCSI */
+				/* command block (when <= SCSI_2) */
+#define SG_FLAG_MMAP_IO 4       /* request memory mapped IO */
+#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
+				/* user space (debug indirect IO) */
+
+/* following 'info' values are "or"-ed together */
+#define SG_INFO_OK_MASK 0x1
+#define SG_INFO_OK 0x0          /* no sense, host nor driver "noise" */
+#define SG_INFO_CHECK 0x1       /* something abnormal happened */
+
+#define SG_INFO_DIRECT_IO_MASK 0x6
+#define SG_INFO_INDIRECT_IO 0x0 /* data xfer via kernel buffers (or no xfer) */
+#define SG_INFO_DIRECT_IO 0x2   /* direct IO requested and performed */
+#define SG_INFO_MIXED_IO 0x4    /* part direct, part indirect IO */
+
+
+typedef struct sg_scsi_id { /* used by SG_GET_SCSI_ID ioctl() */
+    int host_no;        /* as in "scsi<n>" where 'n' is one of 0, 1, 2 etc */
+    int channel;
+    int scsi_id;        /* scsi id of target device */
+    int lun;
+    int scsi_type;      /* TYPE_... defined in scsi/scsi.h */
+    short h_cmd_per_lun;/* host (adapter) maximum commands per lun */
+    short d_queue_depth;/* device (or adapter) maximum queue length */
+    int unused[2];      /* probably find a good use, set 0 for now */
+} sg_scsi_id_t; /* 32 bytes long on i386 */
+
+typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
+    char req_state;     /* 0 -> not used, 1 -> written, 2 -> ready to read */
+    char orphan;        /* 0 -> normal request, 1 -> from interruped SG_IO */
+    char sg_io_owned;   /* 0 -> complete with read(), 1 -> owned by SG_IO */
+    char problem;       /* 0 -> no problem detected, 1 -> error to report */
+    int pack_id;        /* pack_id associated with request */
+    void * usr_ptr;     /* user provided pointer (in new interface) */
+    unsigned int duration; /* millisecs elapsed since written (req_state==1)
+			      or request duration (req_state==2) */
+    int unused;
+} sg_req_info_t; /* 20 bytes long on i386 */
+
+
+/* IOCTLs: Those ioctls that are relevant to the SG 3.x drivers follow.
+ [Those that only apply to the SG 2.x drivers are at the end of the file.]
+ (_GET_s yield result via 'int *' 3rd argument unless otherwise indicated) */
+
+#define SG_EMULATED_HOST 0x2203 /* true for emulated host adapter (ATAPI) */
+
+/* Used to configure SCSI command transformation layer for ATAPI devices */
+/* Only supported by the ide-scsi driver */
+#define SG_SET_TRANSFORM 0x2204 /* N.B. 3rd arg is not pointer but value: */
+		      /* 3rd arg = 0 to disable transform, 1 to enable it */
+#define SG_GET_TRANSFORM 0x2205
+
+#define SG_SET_RESERVED_SIZE 0x2275  /* request a new reserved buffer size */
+#define SG_GET_RESERVED_SIZE 0x2272  /* actual size of reserved buffer */
+
+/* The following ioctl has a 'sg_scsi_id_t *' object as its 3rd argument. */
+#define SG_GET_SCSI_ID 0x2276   /* Yields fd's bus, chan, dev, lun + type */
+/* SCSI id information can also be obtained from SCSI_IOCTL_GET_IDLUN */
+
+/* Override host setting and always DMA using low memory ( <16MB on i386) */
+#define SG_SET_FORCE_LOW_DMA 0x2279  /* 0-> use adapter setting, 1-> force */
+#define SG_GET_LOW_DMA 0x227a   /* 0-> use all ram for dma; 1-> low dma ram */
+
+/* When SG_SET_FORCE_PACK_ID set to 1, pack_id is input to read() which
+   tries to fetch a packet with a matching pack_id, waits, or returns EAGAIN.
+   If pack_id is -1 then read oldest waiting. When ...FORCE_PACK_ID set to 0
+   then pack_id ignored by read() and oldest readable fetched. */
+#define SG_SET_FORCE_PACK_ID 0x227b
+#define SG_GET_PACK_ID 0x227c /* Yields oldest readable pack_id (or -1) */
+
+#define SG_GET_NUM_WAITING 0x227d /* Number of commands awaiting read() */
+
+/* Yields max scatter gather tablesize allowed by current host adapter */
+#define SG_GET_SG_TABLESIZE 0x227F  /* 0 implies can't do scatter gather */
+
+#define SG_GET_VERSION_NUM 0x2282 /* Example: version 2.1.34 yields 20134 */
+
+/* Returns -EBUSY if occupied. 3rd argument pointer to int (see next) */
+#define SG_SCSI_RESET 0x2284
+/* Associated values that can be given to SG_SCSI_RESET follow */
+#define		SG_SCSI_RESET_NOTHING	0
+#define		SG_SCSI_RESET_DEVICE	1
+#define		SG_SCSI_RESET_BUS	2
+#define		SG_SCSI_RESET_HOST	3
+
+/* synchronous SCSI command ioctl, (only in version 3 interface) */
+#define SG_IO 0x2285   /* similar effect as write() followed by read() */
+
+#define SG_GET_REQUEST_TABLE 0x2286   /* yields table of active requests */
+
+/* How to treat EINTR during SG_IO ioctl(), only in SG 3.x series */
+#define SG_SET_KEEP_ORPHAN 0x2287 /* 1 -> hold for read(), 0 -> drop (def) */
+#define SG_GET_KEEP_ORPHAN 0x2288
+
+/* yields scsi midlevel's access_count for this SCSI device */
+#define SG_GET_ACCESS_COUNT 0x2289  
+
+
+#define SG_SCATTER_SZ (8 * 4096)  /* PAGE_SIZE not available to user */
+/* Largest size (in bytes) a single scatter-gather list element can have.
+   The value must be a power of 2 and <= (PAGE_SIZE * 32) [131072 bytes on
+   i386]. The minimum value is PAGE_SIZE. If scatter-gather not supported
+   by adapter then this value is the largest data block that can be
+   read/written by a single scsi command. The user can find the value of
+   PAGE_SIZE by calling getpagesize() defined in unistd.h . */
+
+#define SG_DEFAULT_RETRIES 1
+
+/* Defaults, commented if they differ from original sg driver */
+#define SG_DEF_FORCE_LOW_DMA 0  /* was 1 -> memory below 16MB on i386 */
+#define SG_DEF_FORCE_PACK_ID 0
+#define SG_DEF_KEEP_ORPHAN 0
+#define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */
+
+/* maximum outstanding requests, write() yields EDOM if exceeded */
+#define SG_MAX_QUEUE 16
+
+#define SG_BIG_BUFF SG_DEF_RESERVED_SIZE    /* for backward compatibility */
+
+/* Alternate style type names, "..._t" variants preferred */
+typedef struct sg_io_hdr Sg_io_hdr;
+typedef struct sg_io_vec Sg_io_vec;
+typedef struct sg_scsi_id Sg_scsi_id;
+typedef struct sg_req_info Sg_req_info;
+
+
+/* vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv */
+/*   The older SG interface based on the 'sg_header' structure follows.   */
+/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+#define SG_MAX_SENSE 16   /* this only applies to the sg_header interface */
+
+struct sg_header
+{
+    int pack_len;    /* [o] reply_len (ie useless), ignored as input */
+    int reply_len;   /* [i] max length of expected reply (inc. sg_header) */
+    int pack_id;     /* [io] id number of packet (use ints >= 0) */
+    int result;      /* [o] 0==ok, else (+ve) Unix errno (best ignored) */
+    unsigned int twelve_byte:1;
+	/* [i] Force 12 byte command length for group 6 & 7 commands  */
+    unsigned int target_status:5;   /* [o] scsi status from target */
+    unsigned int host_status:8;     /* [o] host status (see "DID" codes) */
+    unsigned int driver_status:8;   /* [o] driver status+suggestion */
+    unsigned int other_flags:10;    /* unused */
+    unsigned char sense_buffer[SG_MAX_SENSE]; /* [o] Output in 3 cases:
+	   when target_status is CHECK_CONDITION or
+	   when target_status is COMMAND_TERMINATED or
+	   when (driver_status & DRIVER_SENSE) is true. */
+};      /* This structure is 36 bytes long on i386 */
+
+
+/* IOCTLs: The following are not required (or ignored) when the sg_io_hdr_t
+	   interface is used. They are kept for backward compatibility with
+	   the original and version 2 drivers. */
+
+#define SG_SET_TIMEOUT 0x2201  /* unit: jiffies (10ms on i386) */
+#define SG_GET_TIMEOUT 0x2202  /* yield timeout as _return_ value */
+
+/* Get/set command queuing state per fd (default is SG_DEF_COMMAND_Q.
+   Each time a sg_io_hdr_t object is seen on this file descriptor, this
+   command queuing flag is set on (overriding the previous setting). */
+#define SG_GET_COMMAND_Q 0x2270   /* Yields 0 (queuing off) or 1 (on) */
+#define SG_SET_COMMAND_Q 0x2271   /* Change queuing state with 0 or 1 */
+
+/* Turn on/off error sense trace (1 and 0 respectively, default is off).
+   Try using: "# cat /proc/scsi/sg/debug" instead in the v3 driver */
+#define SG_SET_DEBUG 0x227e    /* 0 -> turn off debug */
+
+#define SG_NEXT_CMD_LEN 0x2283  /* override SCSI command length with given
+		   number on the next write() on this file descriptor */
+
+
+/* Defaults, commented if they differ from original sg driver */
+#define SG_DEFAULT_TIMEOUT (60*HZ) /* HZ == 'jiffies in 1 second' */
+#define SG_DEF_COMMAND_Q 0     /* command queuing is always on when
+				  the new interface is used */
+#define SG_DEF_UNDERRUN_FLAG 0
+
+#endif
diff --git a/xen/include/stdarg.h b/xen/include/stdarg.h
new file mode 100644
index 0000000000..9f6215d31f
--- /dev/null
+++ b/xen/include/stdarg.h
@@ -0,0 +1,138 @@
+/* Copyright (C) 1989, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+/*
+ * ISO C Standard:  7.15  Variable arguments  <stdarg.h>
+ */
+
+#ifndef _STDARG_H
+#ifndef _ANSI_STDARG_H_
+#ifndef __need___va_list
+#define _STDARG_H
+#define _ANSI_STDARG_H_
+#endif /* not __need___va_list */
+#undef __need___va_list
+
+/* Define __gnuc_va_list.  */
+
+#ifndef __GNUC_VA_LIST
+#define __GNUC_VA_LIST
+typedef __builtin_va_list __gnuc_va_list;
+#endif
+
+/* Define the standard macros for the user,
+   if this invocation was from the user program.  */
+#ifdef _STDARG_H
+
+/* Note that the type used in va_arg is supposed to match the
+   actual type **after default promotions**.
+   Thus, va_arg (..., short) is not valid.  */
+
+#define va_start(v,l)	__builtin_stdarg_start((v),l)
+#define va_end		__builtin_va_end
+#define va_arg		__builtin_va_arg
+#if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L
+#define va_copy(d,s)	__builtin_va_copy((d),(s))
+#endif
+#define __va_copy(d,s)	__builtin_va_copy((d),(s))
+
+
+/* Define va_list, if desired, from __gnuc_va_list. */
+/* We deliberately do not define va_list when called from
+   stdio.h, because ANSI C says that stdio.h is not supposed to define
+   va_list.  stdio.h needs to have access to that data type, 
+   but must not use that name.  It should use the name __gnuc_va_list,
+   which is safe because it is reserved for the implementation.  */
+
+#ifdef _HIDDEN_VA_LIST  /* On OSF1, this means varargs.h is "half-loaded".  */
+#undef _VA_LIST
+#endif
+
+#ifdef _BSD_VA_LIST
+#undef _BSD_VA_LIST
+#endif
+
+#if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST))
+/* SVR4.2 uses _VA_LIST for an internal alias for va_list,
+   so we must avoid testing it and setting it here.
+   SVR4 uses _VA_LIST as a flag in stdarg.h, but we should
+   have no conflict with that.  */
+#ifndef _VA_LIST_
+#define _VA_LIST_
+#ifdef __i860__
+#ifndef _VA_LIST
+#define _VA_LIST va_list
+#endif
+#endif /* __i860__ */
+typedef __gnuc_va_list va_list;
+#ifdef _SCO_DS
+#define __VA_LIST
+#endif
+#endif /* _VA_LIST_ */
+#else /* not __svr4__ || _SCO_DS */
+
+/* The macro _VA_LIST_ is the same thing used by this file in Ultrix.
+   But on BSD NET2 we must not test or define or undef it.
+   (Note that the comments in NET 2's ansi.h
+   are incorrect for _VA_LIST_--see stdio.h!)  */
+#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT)
+/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5  */
+#ifndef _VA_LIST_DEFINED
+/* The macro _VA_LIST is used in SCO Unix 3.2.  */
+#ifndef _VA_LIST
+/* The macro _VA_LIST_T_H is used in the Bull dpx2  */
+#ifndef _VA_LIST_T_H
+/* The macro __va_list__ is used by BeOS.  */
+#ifndef __va_list__
+typedef __gnuc_va_list va_list;
+#endif /* not __va_list__ */
+#endif /* not _VA_LIST_T_H */
+#endif /* not _VA_LIST */
+#endif /* not _VA_LIST_DEFINED */
+#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__))
+#define _VA_LIST_
+#endif
+#ifndef _VA_LIST
+#define _VA_LIST
+#endif
+#ifndef _VA_LIST_DEFINED
+#define _VA_LIST_DEFINED
+#endif
+#ifndef _VA_LIST_T_H
+#define _VA_LIST_T_H
+#endif
+#ifndef __va_list__
+#define __va_list__
+#endif
+
+#endif /* not _VA_LIST_, except on certain systems */
+
+#endif /* not __svr4__ */
+
+#endif /* _STDARG_H */
+
+#endif /* not _ANSI_STDARG_H_ */
+#endif /* not _STDARG_H */
diff --git a/xen/include/xeno/ac_timer.h b/xen/include/xeno/ac_timer.h
new file mode 100644
index 0000000000..7cf568d2fc
--- /dev/null
+++ b/xen/include/xeno/ac_timer.h
@@ -0,0 +1,65 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: ac_timer.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Nov 2002
+ * 
+ * Environment: Xen Hypervisor
+ * Description: Accurate timer for the Hypervisor
+ * 
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _AC_TIMER_H_
+#define _AC_TIMER_H_
+
+#include <xeno/time.h> /* include notion of time */
+
+/*
+ * The Xen Hypervisor provides two types of timers:
+ *
+ * - Linux style, jiffy based timers for legacy code and coarse grain timeouts
+ *   These are defined in ./include/xeno/timer.h and implemented in
+ *   ./common/timer.c. Unlike in Linux they are executed not on a periodic
+ *   timer interrupt but "occasionally" with somewhat lesser accuracy.
+ *  
+ * - accurate timers defined in this file and implemented in
+ *   ./common/ac_timer.c. These are implemented using a programmable timer
+ *   interrupt and are thus as accurate as the hardware allows. Where possible
+ *   we use the local APIC for this purpose. However, this fact is hidden
+ *   behind a architecture independent layer.
+ *   accurate timers are programmed using system time.
+ * 
+ * The interface to accurate timers is very similar to Linux timers with the
+ * exception that the expires value is not expressed in jiffies but in ns from
+ * boot time.  Its implementation however, is entirely different.
+ */
+
+struct ac_timer {
+	struct list_head timer_list;
+	s_time_t         expires;	/* system time time out value */
+	unsigned long    data;
+	void             (*function)(unsigned long);
+};
+
+/* interface for "clients" */
+extern int add_ac_timer(struct ac_timer *timer);
+extern int rem_ac_timer(struct ac_timer *timer);
+extern int mod_ac_timer(struct ac_timer *timer, s_time_t new_time);
+static inline void init_ac_timer(struct ac_timer *timer)
+{
+	//timer->next = NULL;
+}
+
+/* interface used by programmable timer, implemented hardware dependent */
+extern int  reprogram_ac_timer(s_time_t timeout);
+extern void do_ac_timer(void);
+
+#endif /* _AC_TIMER_H_ */
diff --git a/xen/include/xeno/blk.h b/xen/include/xeno/blk.h
new file mode 100644
index 0000000000..bc3f5548c6
--- /dev/null
+++ b/xen/include/xeno/blk.h
@@ -0,0 +1,409 @@
+#ifndef _BLK_H
+#define _BLK_H
+
+#include <xeno/blkdev.h>
+/*#include <xeno/locks.h>*/
+#include <xeno/config.h>
+#include <xeno/spinlock.h>
+
+/*
+ * Spinlock for protecting the request queue which
+ * is mucked around with in interrupts on potentially
+ * multiple CPU's..
+ */
+extern spinlock_t io_request_lock;
+
+/*
+ * Initialization functions.
+ */
+extern int isp16_init(void);
+extern int cdu31a_init(void);
+extern int acsi_init(void);
+extern int mcd_init(void);
+extern int mcdx_init(void);
+extern int sbpcd_init(void);
+extern int aztcd_init(void);
+extern int sony535_init(void);
+extern int gscd_init(void);
+extern int cm206_init(void);
+extern int optcd_init(void);
+extern int sjcd_init(void);
+extern int cdi_init(void);
+extern int hd_init(void);
+extern int ide_init(void);
+extern int xd_init(void);
+extern int mfm_init(void);
+extern int loop_init(void);
+extern int md_init(void);
+extern int ap_init(void);
+extern int ddv_init(void);
+extern int z2_init(void);
+extern int swim3_init(void);
+extern int swimiop_init(void);
+extern int amiga_floppy_init(void);
+extern int atari_floppy_init(void);
+extern int ez_init(void);
+extern int bpcd_init(void);
+extern int ps2esdi_init(void);
+extern int jsfd_init(void);
+extern int viodasd_init(void);
+extern int viocd_init(void);
+
+#if defined(CONFIG_ARCH_S390)
+extern int dasd_init(void);
+extern int xpram_init(void);
+extern int tapeblock_init(void);
+#endif /* CONFIG_ARCH_S390 */
+
+extern void set_device_ro(kdev_t dev,int flag);
+#if 0
+void add_blkdev_randomness(int major);
+#else
+#define add_blkdev_randomness(_major) ((void)0)
+#endif
+
+extern int floppy_init(void);
+extern int rd_doload;		/* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt;		/* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start;	/* starting block # of image */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
+
+extern unsigned long initrd_start,initrd_end;
+extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */
+void initrd_init(void);
+
+#endif
+
+		 
+/*
+ * end_request() and friends. Must be called with the request queue spinlock
+ * acquired. All functions called within end_request() _must_be_ atomic.
+ *
+ * Several drivers define their own end_request and call
+ * end_that_request_first() and end_that_request_last()
+ * for parts of the original function. This prevents
+ * code duplication in drivers.
+ */
+
+static inline void blkdev_dequeue_request(struct request * req)
+{
+	list_del(&req->queue);
+}
+
+int end_that_request_first(struct request *req, int uptodate, char *name);
+void end_that_request_last(struct request *req);
+
+#if defined(MAJOR_NR) || defined(IDE_DRIVER)
+
+#undef DEVICE_ON
+#undef DEVICE_OFF
+
+/*
+ * Add entries as needed.
+ */
+
+#ifdef IDE_DRIVER
+
+#define DEVICE_NR(device)	(MINOR(device) >> PARTN_BITS)
+#define DEVICE_NAME "ide"
+
+#elif (MAJOR_NR == RAMDISK_MAJOR)
+
+/* ram disk */
+#define DEVICE_NAME "ramdisk"
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_NO_RANDOM
+
+#elif (MAJOR_NR == Z2RAM_MAJOR)
+
+/* Zorro II Ram */
+#define DEVICE_NAME "Z2RAM"
+#define DEVICE_REQUEST do_z2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == FLOPPY_MAJOR)
+
+static void floppy_off(unsigned int nr);
+
+#define DEVICE_NAME "floppy"
+#define DEVICE_INTR do_floppy
+#define DEVICE_REQUEST do_fd_request
+#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 ))
+#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device))
+
+#elif (MAJOR_NR == HD_MAJOR)
+
+/* Hard disk:  timeout is 6 seconds. */
+#define DEVICE_NAME "hard disk"
+#define DEVICE_INTR do_hd
+#define TIMEOUT_VALUE (6*HZ)
+#define DEVICE_REQUEST do_hd_request
+#define DEVICE_NR(device) (MINOR(device)>>6)
+
+#elif (SCSI_DISK_MAJOR(MAJOR_NR))
+
+#define DEVICE_NAME "scsidisk"
+#define TIMEOUT_VALUE (2*HZ)
+#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4))
+
+/* Kludge to use the same number for both char and block major numbers */
+#elif  (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER)
+
+#define DEVICE_NAME "Multiple devices driver"
+#define DEVICE_REQUEST do_md_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SCSI_TAPE_MAJOR)
+
+#define DEVICE_NAME "scsitape"
+#define DEVICE_INTR do_st  
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+
+#elif (MAJOR_NR == OSST_MAJOR)
+
+#define DEVICE_NAME "onstream" 
+#define DEVICE_INTR do_osst
+#define DEVICE_NR(device) (MINOR(device) & 0x7f) 
+#define DEVICE_ON(device) 
+#define DEVICE_OFF(device) 
+
+#elif (MAJOR_NR == SCSI_CDROM_MAJOR)
+
+#define DEVICE_NAME "CD-ROM"
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == XT_DISK_MAJOR)
+
+#define DEVICE_NAME "xt disk"
+#define DEVICE_REQUEST do_xd_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == PS2ESDI_MAJOR)
+
+#define DEVICE_NAME "PS/2 ESDI"
+#define DEVICE_REQUEST do_ps2esdi_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == CDU31A_CDROM_MAJOR)
+
+#define DEVICE_NAME "CDU31A"
+#define DEVICE_REQUEST do_cdu31a_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE))
+
+#define DEVICE_NAME "ACSI"
+#define DEVICE_INTR do_acsi
+#define DEVICE_REQUEST do_acsi_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcd */
+#define DEVICE_REQUEST do_mcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcdx */
+#define DEVICE_REQUEST do_mcdx_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #1"
+#define DEVICE_REQUEST do_sbpcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #2"
+#define DEVICE_REQUEST do_sbpcd2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #3"
+#define DEVICE_REQUEST do_sbpcd3_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #4"
+#define DEVICE_REQUEST do_sbpcd4_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == AZTECH_CDROM_MAJOR)
+
+#define DEVICE_NAME "Aztech CD-ROM"
+#define DEVICE_REQUEST do_aztcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CDU535_CDROM_MAJOR)
+
+#define DEVICE_NAME "SONY-CDU535"
+#define DEVICE_INTR do_cdu535
+#define DEVICE_REQUEST do_cdu535_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR)
+
+#define DEVICE_NAME "Goldstar R420"
+#define DEVICE_REQUEST do_gscd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CM206_CDROM_MAJOR)
+#define DEVICE_NAME "Philips/LMS CD-ROM cm206"
+#define DEVICE_REQUEST do_cm206_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == OPTICS_CDROM_MAJOR)
+
+#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM"
+#define DEVICE_REQUEST do_optcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SANYO_CDROM_MAJOR)
+
+#define DEVICE_NAME "Sanyo H94A CD-ROM"
+#define DEVICE_REQUEST do_sjcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == APBLOCK_MAJOR)
+
+#define DEVICE_NAME "apblock"
+#define DEVICE_REQUEST ap_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DDV_MAJOR)
+
+#define DEVICE_NAME "ddv"
+#define DEVICE_REQUEST ddv_request
+#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS)
+
+#elif (MAJOR_NR == MFM_ACORN_MAJOR)
+
+#define DEVICE_NAME "mfm disk"
+#define DEVICE_INTR do_mfm
+#define DEVICE_REQUEST do_mfm_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == NBD_MAJOR)
+
+#define DEVICE_NAME "nbd"
+#define DEVICE_REQUEST do_nbd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MDISK_MAJOR)
+
+#define DEVICE_NAME "mdisk"
+#define DEVICE_REQUEST mdisk_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DASD_MAJOR)
+
+#define DEVICE_NAME "dasd"
+#define DEVICE_REQUEST do_dasd_request
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+
+#elif (MAJOR_NR == I2O_MAJOR)
+
+#define DEVICE_NAME "I2O block"
+#define DEVICE_REQUEST i2ob_request
+#define DEVICE_NR(device) (MINOR(device)>>4)
+
+#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
+
+#define DEVICE_NAME "ida"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_ida_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#endif /* MAJOR_NR == whatever */
+
+/* provide DEVICE_xxx defaults, if not explicitly defined
+ * above in the MAJOR_NR==xxx if-elif tree */
+#ifndef DEVICE_ON
+#define DEVICE_ON(device) do {} while (0)
+#endif
+#ifndef DEVICE_OFF
+#define DEVICE_OFF(device) do {} while (0)
+#endif
+
+#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR)
+#if !defined(IDE_DRIVER)
+
+#ifndef CURRENT
+#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+
+#ifndef DEVICE_NAME
+#define DEVICE_NAME "unknown"
+#endif
+
+#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev)
+
+#ifdef DEVICE_INTR
+static void (*DEVICE_INTR)(void) = NULL;
+#endif
+
+#define SET_INTR(x) (DEVICE_INTR = (x))
+
+#ifdef DEVICE_REQUEST
+static void (DEVICE_REQUEST)(request_queue_t *);
+#endif 
+  
+#ifdef DEVICE_INTR
+#define CLEAR_INTR SET_INTR(NULL)
+#else
+#define CLEAR_INTR
+#endif
+
+#define INIT_REQUEST \
+	if (QUEUE_EMPTY) {\
+		CLEAR_INTR; \
+		return; \
+	} \
+	if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
+		panic(DEVICE_NAME ": request list destroyed"); \
+	if (CURRENT->bh) { \
+		if (!buffer_locked(CURRENT->bh)) \
+			panic(DEVICE_NAME ": block not locked"); \
+	}
+
+#endif /* !defined(IDE_DRIVER) */
+
+
+#ifndef LOCAL_END_REQUEST	/* If we have our own end_request, we do not want to include this mess */
+
+#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
+
+static inline void end_request(int uptodate) {
+	struct request *req = CURRENT;
+
+	if (end_that_request_first(req, uptodate, DEVICE_NAME))
+		return;
+
+#ifndef DEVICE_NO_RANDOM
+	add_blkdev_randomness(MAJOR(req->rq_dev));
+#endif
+	DEVICE_OFF(req->rq_dev);
+	blkdev_dequeue_request(req);
+	end_that_request_last(req);
+}
+
+#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
+#endif /* LOCAL_END_REQUEST */
+
+#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */
+#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */
+
+#endif /* _BLK_H */
diff --git a/xen/include/xeno/blkdev.h b/xen/include/xeno/blkdev.h
new file mode 100644
index 0000000000..a2cd390517
--- /dev/null
+++ b/xen/include/xeno/blkdev.h
@@ -0,0 +1,371 @@
+#ifndef _LINUX_BLKDEV_H
+#define _LINUX_BLKDEV_H
+
+#include <xeno/lib.h>
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <xeno/list.h>
+#include <xeno/kdev_t.h>
+#include <xeno/sched.h>
+
+/* Some defines from fs.h that may actually be useful to the blkdev layer. */
+#define READ 0
+#define WRITE 1
+#define READA 2
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+extern void init_blkdev_info(struct task_struct *);
+extern void destroy_blkdev_info(struct task_struct *);
+
+extern int unregister_blkdev(unsigned int, const char *);
+extern int invalidate_device(kdev_t, int);
+extern int check_disk_change(kdev_t);
+struct block_device;
+extern void invalidate_bdev(struct block_device *, int);
+
+/*
+ * Metainformation regarding block devices is kept in inode and file
+ * structures. We don't actually want those so we define just as much 
+ * as we need right here.
+ */
+struct file {
+};
+struct inode {
+    kdev_t i_rdev; /* for _open and _release, specifies the blkdev */
+    struct block_device *i_bdev;
+};
+
+struct block_device_operations {
+        int (*open) (struct inode *, struct file *);
+        int (*release) (struct inode *, struct file *);
+        int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
+        int (*check_media_change) (kdev_t);
+        int (*revalidate) (kdev_t);
+};
+
+
+enum bh_state_bits {
+        BH_Uptodate,    /* 1 if the buffer contains valid data */
+        BH_Dirty,       /* 1 if the buffer is dirty */
+        BH_Lock,        /* 1 if the buffer is locked */
+        BH_Req,         /* 0 if the buffer has been invalidated */
+        BH_Mapped,      /* 1 if the buffer has a disk mapping */
+        BH_New,         /* 1 if the buffer is new and not yet written out */
+        BH_Async,       /* 1 if the buffer is under end_buffer_io_async I/O */
+        BH_Wait_IO,     /* 1 if we should write out this buffer */
+        BH_Launder,     /* 1 if we can throttle on this buffer */
+        BH_JBD,         /* 1 if it has an attached journal_head */
+        BH_Read,        /* 1 if request is a read from disc */
+        BH_Write        /* 1 if request is a write to disc */
+};
+
+struct buffer_head {
+        unsigned long b_blocknr;        /* block number */
+        unsigned short b_size;          /* block size */
+        unsigned short b_list;          /* List that this buffer appears */
+        kdev_t b_dev;                   /* device (B_FREE = free) */
+
+        atomic_t b_count;               /* users using this block */
+        kdev_t b_rdev;                  /* Real device */
+        unsigned long b_state;          /* buffer state bitmap (see above) */
+
+        struct buffer_head *b_reqnext;  /* request queue */
+
+        char * b_data;                  /* pointer to data block */
+        struct pfn_info *b_page;            /* the page this bh is mapped to */
+        void (*b_end_io)(struct buffer_head *bh, int uptodate);
+
+        unsigned long b_rsector;        /* Real buffer location on disk */
+
+        /* Both used by b_end_io function in xen_block.c */
+        void *b_xen_domain;
+        void *b_xen_id;
+};
+
+typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
+void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
+
+#define __buffer_state(bh, state)       (((bh)->b_state & (1UL << BH_##state)) != 0)
+
+#define buffer_uptodate(bh)     __buffer_state(bh,Uptodate)
+#define buffer_dirty(bh)        __buffer_state(bh,Dirty)
+#define buffer_locked(bh)       __buffer_state(bh,Lock)
+#define buffer_req(bh)          __buffer_state(bh,Req)
+#define buffer_mapped(bh)       __buffer_state(bh,Mapped)
+#define buffer_new(bh)          __buffer_state(bh,New)
+#define buffer_async(bh)        __buffer_state(bh,Async)
+#define buffer_launder(bh)      __buffer_state(bh,Launder)
+
+#define bh_offset(bh)           ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+
+extern void set_bh_page(struct buffer_head *bh, struct pfn_info *page, unsigned long offset);
+
+#define touch_buffer(bh)        mark_page_accessed(bh->b_page)
+
+#define atomic_set_buffer_clean(bh) test_and_clear_bit(BH_Dirty, &(bh)->b_state)
+
+static inline void __mark_buffer_clean(struct buffer_head *bh)
+{
+    panic("__mark_buffer_clean");
+}
+
+static inline void mark_buffer_clean(struct buffer_head * bh)
+{
+        if (atomic_set_buffer_clean(bh))
+                __mark_buffer_clean(bh);
+}
+
+static inline void buffer_IO_error(struct buffer_head * bh)
+{
+    mark_buffer_clean(bh);
+    /* b_end_io has to clear the BH_Uptodate bitflag in the error case! */
+    bh->b_end_io(bh, 0);
+}
+
+/**** XXX END OF BUFFER_HEAD STUFF XXXX ****/
+
+#include <xeno/major.h>
+#include <xeno/sched.h>
+#include <xeno/genhd.h>
+#include <xeno/tqueue.h>
+#include <xeno/list.h>
+
+struct request_queue;
+typedef struct request_queue request_queue_t;
+struct elevator_s;
+typedef struct elevator_s elevator_t;
+
+/*
+ * Ok, this is an expanded form so that we can use the same
+ * request for paging requests.
+ */
+struct request {
+	struct list_head queue;
+	int elevator_sequence;
+
+	volatile int rq_status;	/* should split this into a few status bits */
+#define RQ_INACTIVE		(-1)
+#define RQ_ACTIVE		1
+#define RQ_SCSI_BUSY		0xffff
+#define RQ_SCSI_DONE		0xfffe
+#define RQ_SCSI_DISCONNECTING	0xffe0
+
+	kdev_t rq_dev;
+	int cmd;		/* READ or WRITE */
+	int errors;
+	unsigned long start_time;
+	unsigned long sector;
+	unsigned long nr_sectors;
+	unsigned long hard_sector, hard_nr_sectors;
+	unsigned int nr_segments;
+	unsigned int nr_hw_segments;
+	unsigned long current_nr_sectors;
+	void * special;
+	char * buffer;
+	struct completion * waiting;
+	struct buffer_head * bh;
+	struct buffer_head * bhtail;
+	request_queue_t *q;
+};
+
+#include <xeno/elevator.h>
+
+typedef int (merge_request_fn) (request_queue_t *q, 
+				struct request  *req,
+				struct buffer_head *bh,
+				int);
+typedef int (merge_requests_fn) (request_queue_t *q, 
+				 struct request  *req,
+				 struct request  *req2,
+				 int);
+typedef void (request_fn_proc) (request_queue_t *q);
+typedef request_queue_t * (queue_proc) (kdev_t dev);
+typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh);
+typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
+typedef void (unplug_device_fn) (void *q);
+
+/*
+ * Default nr free requests per queue, ll_rw_blk will scale it down
+ * according to available RAM at init time
+ */
+#define QUEUE_NR_REQUESTS	8192
+
+struct request_list {
+	unsigned int count;
+	struct list_head free;
+};
+
+struct request_queue
+{
+	/*
+	 * the queue request freelist, one for reads and one for writes
+	 */
+	struct request_list	rq[2];
+
+	/*
+	 * The total number of requests on each queue
+	 */
+	int nr_requests;
+
+	/*
+	 * Batching threshold for sleep/wakeup decisions
+	 */
+	int batch_requests;
+
+	/*
+	 * Together with queue_head for cacheline sharing
+	 */
+	struct list_head	queue_head;
+	elevator_t		elevator;
+
+	request_fn_proc		* request_fn;
+	merge_request_fn	* back_merge_fn;
+	merge_request_fn	* front_merge_fn;
+	merge_requests_fn	* merge_requests_fn;
+	make_request_fn		* make_request_fn;
+	plug_device_fn		* plug_device_fn;
+	/*
+	 * The queue owner gets to use this for whatever they like.
+	 * ll_rw_blk doesn't touch it.
+	 */
+	void			* queuedata;
+
+	/*
+	 * This is used to remove the plug when tq_disk runs.
+	 */
+	struct tq_struct	plug_tq;
+
+	/*
+	 * Boolean that indicates whether this queue is plugged or not.
+	 */
+	char			plugged;
+
+	/*
+	 * Boolean that indicates whether current_request is active or
+	 * not.
+	 */
+	char			head_active;
+
+	/*
+	 * Is meant to protect the queue in the future instead of
+	 * io_request_lock
+	 */
+	spinlock_t		queue_lock;
+
+#if 0
+	/*
+	 * Tasks wait here for free read and write requests
+	 */
+	wait_queue_head_t	wait_for_requests[2];
+#endif
+};
+
+#define bh_phys(bh)            (page_to_phys((bh)->b_page) + bh_offset((bh)))
+
+struct blk_dev_struct {
+	/*
+	 * queue_proc has to be atomic
+	 */
+	request_queue_t		request_queue;
+	queue_proc		*queue;
+	void			*data;
+};
+
+struct sec_size {
+	unsigned block_size;
+	unsigned block_size_bits;
+};
+
+/*
+ * Used to indicate the default queue for drivers that don't bother
+ * to implement multiple queues.  We have this access macro here
+ * so as to eliminate the need for each and every block device
+ * driver to know about the internal structure of blk_dev[].
+ */
+#define BLK_DEFAULT_QUEUE(_MAJOR)  &blk_dev[_MAJOR].request_queue
+
+extern struct sec_size * blk_sec[MAX_BLKDEV];
+extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
+extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size);
+extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
+extern void generic_make_request(int rw, struct buffer_head * bh);
+extern inline request_queue_t *blk_get_queue(kdev_t dev);
+extern void blkdev_release_request(struct request *);
+
+/*
+ * Access functions for manipulating queue properties
+ */
+extern int blk_grow_request_list(request_queue_t *q, int nr_requests);
+extern void blk_init_queue(request_queue_t *, request_fn_proc *);
+extern void blk_cleanup_queue(request_queue_t *);
+extern void blk_queue_headactive(request_queue_t *, int);
+extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void generic_unplug_device(void *);
+
+extern int * blk_size[MAX_BLKDEV];
+
+extern int * blksize_size[MAX_BLKDEV];
+
+extern int * hardsect_size[MAX_BLKDEV];
+
+/*extern int * max_readahead[MAX_BLKDEV];*/
+
+extern int * max_sectors[MAX_BLKDEV];
+
+extern int * max_segments[MAX_BLKDEV];
+
+#define MAX_SEGMENTS 128
+#define MAX_SECTORS 255
+
+#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
+
+#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue)
+#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
+#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
+#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next)
+#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev)
+
+extern void drive_stat_acct (kdev_t dev, int rw,
+					unsigned long nr_sectors, int new_io);
+
+static inline int get_hardsect_size(kdev_t dev)
+{
+	int retval = 512;
+	int major = MAJOR(dev);
+
+	if (hardsect_size[major]) {
+		int minor = MINOR(dev);
+		if (hardsect_size[major][minor])
+			retval = hardsect_size[major][minor];
+	}
+	return retval;
+}
+
+#define blk_finished_io(nsects)	do { } while (0)
+#define blk_started_io(nsects)	do { } while (0)
+
+static inline unsigned int blksize_bits(unsigned int size)
+{
+	unsigned int bits = 8;
+	do {
+		bits++;
+		size >>= 1;
+	} while (size > 256);
+	return bits;
+}
+
+static inline unsigned int block_size(kdev_t dev)
+{
+	int retval = BLOCK_SIZE;
+	int major = MAJOR(dev);
+
+	if (blksize_size[major]) {
+		int minor = MINOR(dev);
+		if (blksize_size[major][minor])
+			retval = blksize_size[major][minor];
+	}
+	return retval;
+}
+
+#endif
diff --git a/xen/include/xeno/blkpg.h b/xen/include/xeno/blkpg.h
new file mode 100644
index 0000000000..f4240abaf1
--- /dev/null
+++ b/xen/include/xeno/blkpg.h
@@ -0,0 +1,64 @@
+#ifndef _LINUX_BLKPG_H
+#define _LINUX_BLKPG_H
+
+/*
+ * Partition table and disk geometry handling
+ *
+ * A single ioctl with lots of subfunctions:
+ *
+ * Device number stuff:
+ *    get_whole_disk()		(given the device number of a partition,
+ *                               find the device number of the encompassing disk)
+ *    get_all_partitions()	(given the device number of a disk, return the
+ *				 device numbers of all its known partitions)
+ *
+ * Partition stuff:
+ *    add_partition()
+ *    delete_partition()
+ *    test_partition_in_use()	(also for test_disk_in_use)
+ *
+ * Geometry stuff:
+ *    get_geometry()
+ *    set_geometry()
+ *    get_bios_drivedata()
+ *
+ * For today, only the partition stuff - aeb, 990515
+ */
+#include <xeno/ioctl.h>
+
+#define BLKPG      _IO(0x12,105)
+
+/* The argument structure */
+struct blkpg_ioctl_arg {
+        int op;
+        int flags;
+        int datalen;
+        void *data;
+};
+
+/* The subfunctions (for the op field) */
+#define BLKPG_ADD_PARTITION	1
+#define BLKPG_DEL_PARTITION	2
+
+/* Sizes of name fields. Unused at present. */
+#define BLKPG_DEVNAMELTH	64
+#define BLKPG_VOLNAMELTH	64
+
+/* The data structure for ADD_PARTITION and DEL_PARTITION */
+struct blkpg_partition {
+	long long start;		/* starting offset in bytes */
+	long long length;		/* length in bytes */
+	int pno;			/* partition number */
+	char devname[BLKPG_DEVNAMELTH];	/* partition name, like sda5 or c0d1p2,
+					   to be used in kernel messages */
+	char volname[BLKPG_VOLNAMELTH];	/* volume label */
+};
+
+#ifdef __KERNEL__
+
+extern char * partition_name(kdev_t dev);
+extern int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_BLKPG_H */
diff --git a/xen/include/xeno/block.h b/xen/include/xeno/block.h
new file mode 100644
index 0000000000..bb80d0987b
--- /dev/null
+++ b/xen/include/xeno/block.h
@@ -0,0 +1,11 @@
+/* block.h
+ * 
+ * this is the hypervisor end of the block io code. 
+ */
+
+#include <hypervisor-ifs/block.h>
+
+/* vif prototypes */
+blk_ring_t *create_block_ring(int domain);
+void destroy_block_ring(struct task_struct *p);
+
diff --git a/xen/include/xeno/brlock.h b/xen/include/xeno/brlock.h
new file mode 100644
index 0000000000..208c457338
--- /dev/null
+++ b/xen/include/xeno/brlock.h
@@ -0,0 +1,220 @@
+#ifndef __LINUX_BRLOCK_H
+#define __LINUX_BRLOCK_H
+
+/*
+ * 'Big Reader' read-write spinlocks.
+ *
+ * super-fast read/write locks, with write-side penalty. The point
+ * is to have a per-CPU read/write lock. Readers lock their CPU-local
+ * readlock, writers must lock all locks to get write access. These
+ * CPU-read-write locks are semantically identical to normal rwlocks.
+ * Memory usage is higher as well. (NR_CPUS*L1_CACHE_BYTES bytes)
+ *
+ * The most important feature is that these spinlocks do not cause
+ * cacheline ping-pong in the 'most readonly data' case.
+ *
+ * Copyright 2000, Ingo Molnar <mingo@redhat.com>
+ *
+ * Registry idea and naming [ crutial! :-) ] by:
+ *
+ *                 David S. Miller <davem@redhat.com>
+ *
+ * David has an implementation that doesnt use atomic operations in
+ * the read branch via memory ordering tricks - i guess we need to
+ * split this up into a per-arch thing? The atomicity issue is a
+ * secondary item in profiles, at least on x86 platforms.
+ *
+ * The atomic op version overhead is indeed a big deal on
+ * load-locked/store-conditional cpus (ALPHA/MIPS/PPC) and
+ * compare-and-swap cpus (Sparc64).  So we control which
+ * implementation to use with a __BRLOCK_USE_ATOMICS define. -DaveM
+ */
+
+/* Register bigreader lock indices here. */
+enum brlock_indices {
+	BR_GLOBALIRQ_LOCK,
+	BR_NETPROTO_LOCK,
+
+	__BR_END
+};
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+
+#if defined(__i386__) || defined(__ia64__) || defined(__x86_64__)
+#define __BRLOCK_USE_ATOMICS
+#else
+#undef __BRLOCK_USE_ATOMICS
+#endif
+
+#ifdef __BRLOCK_USE_ATOMICS
+typedef rwlock_t	brlock_read_lock_t;
+#else
+typedef unsigned int	brlock_read_lock_t;
+#endif
+
+/*
+ * align last allocated index to the next cacheline:
+ */
+#define __BR_IDX_MAX \
+	(((sizeof(brlock_read_lock_t)*__BR_END + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) / sizeof(brlock_read_lock_t))
+
+extern brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX];
+
+#ifndef __BRLOCK_USE_ATOMICS
+struct br_wrlock {
+	spinlock_t lock;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct br_wrlock __br_write_locks[__BR_IDX_MAX];
+#endif
+
+extern void __br_lock_usage_bug (void);
+
+#ifdef __BRLOCK_USE_ATOMICS
+
+static inline void br_read_lock (enum brlock_indices idx)
+{
+	/*
+	 * This causes a link-time bug message if an
+	 * invalid index is used:
+	 */
+	if (idx >= __BR_END)
+		__br_lock_usage_bug();
+
+	read_lock(&__brlock_array[smp_processor_id()][idx]);
+}
+
+static inline void br_read_unlock (enum brlock_indices idx)
+{
+	if (idx >= __BR_END)
+		__br_lock_usage_bug();
+
+	read_unlock(&__brlock_array[smp_processor_id()][idx]);
+}
+
+#else /* ! __BRLOCK_USE_ATOMICS */
+static inline void br_read_lock (enum brlock_indices idx)
+{
+	unsigned int *ctr;
+	spinlock_t *lock;
+
+	/*
+	 * This causes a link-time bug message if an
+	 * invalid index is used:
+	 */
+	if (idx >= __BR_END)
+		__br_lock_usage_bug();
+
+	ctr = &__brlock_array[smp_processor_id()][idx];
+	lock = &__br_write_locks[idx].lock;
+again:
+	(*ctr)++;
+	mb();
+	if (spin_is_locked(lock)) {
+		(*ctr)--;
+		wmb(); /*
+			* The release of the ctr must become visible
+			* to the other cpus eventually thus wmb(),
+			* we don't care if spin_is_locked is reordered
+			* before the releasing of the ctr.
+			* However IMHO this wmb() is superflous even in theory.
+			* It would not be superflous only if on the
+			* other CPUs doing a ldl_l instead of an ldl
+			* would make a difference and I don't think this is
+			* the case.
+			* I'd like to clarify this issue further
+			* but for now this is a slow path so adding the
+			* wmb() will keep us on the safe side.
+			*/
+		while (spin_is_locked(lock))
+			barrier();
+		goto again;
+	}
+}
+
+static inline void br_read_unlock (enum brlock_indices idx)
+{
+	unsigned int *ctr;
+
+	if (idx >= __BR_END)
+		__br_lock_usage_bug();
+
+	ctr = &__brlock_array[smp_processor_id()][idx];
+
+	wmb();
+	(*ctr)--;
+}
+#endif /* __BRLOCK_USE_ATOMICS */
+
+/* write path not inlined - it's rare and larger */
+
+extern void FASTCALL(__br_write_lock (enum brlock_indices idx));
+extern void FASTCALL(__br_write_unlock (enum brlock_indices idx));
+
+static inline void br_write_lock (enum brlock_indices idx)
+{
+	if (idx >= __BR_END)
+		__br_lock_usage_bug();
+	__br_write_lock(idx);
+}
+
+static inline void br_write_unlock (enum brlock_indices idx)
+{
+	if (idx >= __BR_END)
+		__br_lock_usage_bug();
+	__br_write_unlock(idx);
+}
+
+#else
+# define br_read_lock(idx)	((void)(idx))
+# define br_read_unlock(idx)	((void)(idx))
+# define br_write_lock(idx)	((void)(idx))
+# define br_write_unlock(idx)	((void)(idx))
+#endif
+
+/*
+ * Now enumerate all of the possible sw/hw IRQ protected
+ * versions of the interfaces.
+ */
+#define br_read_lock_irqsave(idx, flags) \
+	do { local_irq_save(flags); br_read_lock(idx); } while (0)
+
+#define br_read_lock_irq(idx) \
+	do { local_irq_disable(); br_read_lock(idx); } while (0)
+
+#define br_read_lock_bh(idx) \
+	do { local_bh_disable(); br_read_lock(idx); } while (0)
+
+#define br_write_lock_irqsave(idx, flags) \
+	do { local_irq_save(flags); br_write_lock(idx); } while (0)
+
+#define br_write_lock_irq(idx) \
+	do { local_irq_disable(); br_write_lock(idx); } while (0)
+
+#define br_write_lock_bh(idx) \
+	do { local_bh_disable(); br_write_lock(idx); } while (0)
+
+#define br_read_unlock_irqrestore(idx, flags) \
+	do { br_read_unlock(irx); local_irq_restore(flags); } while (0)
+
+#define br_read_unlock_irq(idx) \
+	do { br_read_unlock(idx); local_irq_enable(); } while (0)
+
+#define br_read_unlock_bh(idx) \
+	do { br_read_unlock(idx); local_bh_enable(); } while (0)
+
+#define br_write_unlock_irqrestore(idx, flags) \
+	do { br_write_unlock(irx); local_irq_restore(flags); } while (0)
+
+#define br_write_unlock_irq(idx) \
+	do { br_write_unlock(idx); local_irq_enable(); } while (0)
+
+#define br_write_unlock_bh(idx) \
+	do { br_write_unlock(idx); local_bh_enable(); } while (0)
+
+#endif /* __LINUX_BRLOCK_H */
diff --git a/xen/include/xeno/byteorder/big_endian.h b/xen/include/xeno/byteorder/big_endian.h
new file mode 100644
index 0000000000..b84efd74c9
--- /dev/null
+++ b/xen/include/xeno/byteorder/big_endian.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_BYTEORDER_BIG_ENDIAN_H
+#define _LINUX_BYTEORDER_BIG_ENDIAN_H
+
+#ifndef __BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#endif
+#ifndef __BIG_ENDIAN_BITFIELD
+#define __BIG_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+
+#define __constant_htonl(x) ((__u32)(x))
+#define __constant_ntohl(x) ((__u32)(x))
+#define __constant_htons(x) ((__u16)(x))
+#define __constant_ntohs(x) ((__u16)(x))
+#define __constant_cpu_to_le64(x) ___constant_swab64((x))
+#define __constant_le64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_le32(x) ___constant_swab32((x))
+#define __constant_le32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_le16(x) ___constant_swab16((x))
+#define __constant_le16_to_cpu(x) ___constant_swab16((x))
+#define __constant_cpu_to_be64(x) ((__u64)(x))
+#define __constant_be64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_be32(x) ((__u32)(x))
+#define __constant_be32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_be16(x) ((__u16)(x))
+#define __constant_be16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_le64(x) __swab64((x))
+#define __le64_to_cpu(x) __swab64((x))
+#define __cpu_to_le32(x) __swab32((x))
+#define __le32_to_cpu(x) __swab32((x))
+#define __cpu_to_le16(x) __swab16((x))
+#define __le16_to_cpu(x) __swab16((x))
+#define __cpu_to_be64(x) ((__u64)(x))
+#define __be64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_be32(x) ((__u32)(x))
+#define __be32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_be16(x) ((__u16)(x))
+#define __be16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_le64p(x) __swab64p((x))
+#define __le64_to_cpup(x) __swab64p((x))
+#define __cpu_to_le32p(x) __swab32p((x))
+#define __le32_to_cpup(x) __swab32p((x))
+#define __cpu_to_le16p(x) __swab16p((x))
+#define __le16_to_cpup(x) __swab16p((x))
+#define __cpu_to_be64p(x) (*(__u64*)(x))
+#define __be64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_be32p(x) (*(__u32*)(x))
+#define __be32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_be16p(x) (*(__u16*)(x))
+#define __be16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_le64s(x) __swab64s((x))
+#define __le64_to_cpus(x) __swab64s((x))
+#define __cpu_to_le32s(x) __swab32s((x))
+#define __le32_to_cpus(x) __swab32s((x))
+#define __cpu_to_le16s(x) __swab16s((x))
+#define __le16_to_cpus(x) __swab16s((x))
+#define __cpu_to_be64s(x) do {} while (0)
+#define __be64_to_cpus(x) do {} while (0)
+#define __cpu_to_be32s(x) do {} while (0)
+#define __be32_to_cpus(x) do {} while (0)
+#define __cpu_to_be16s(x) do {} while (0)
+#define __be16_to_cpus(x) do {} while (0)
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */
diff --git a/xen/include/xeno/byteorder/generic.h b/xen/include/xeno/byteorder/generic.h
new file mode 100644
index 0000000000..d3d63a5a1e
--- /dev/null
+++ b/xen/include/xeno/byteorder/generic.h
@@ -0,0 +1,180 @@
+#ifndef _LINUX_BYTEORDER_GENERIC_H
+#define _LINUX_BYTEORDER_GENERIC_H
+
+/*
+ * linux/byteorder_generic.h
+ * Generic Byte-reordering support
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19970707
+ *    gathered all the good ideas from all asm-foo/byteorder.h into one file,
+ *    cleaned them up.
+ *    I hope it is compliant with non-GCC compilers.
+ *    I decided to put __BYTEORDER_HAS_U64__ in byteorder.h,
+ *    because I wasn't sure it would be ok to put it in types.h
+ *    Upgraded it to 2.1.43
+ * Francois-Rene Rideau <fare@tunes.org> 19971012
+ *    Upgraded it to 2.1.57
+ *    to please Linus T., replaced huge #ifdef's between little/big endian
+ *    by nestedly #include'd files.
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ *    Made it to 2.1.71; now a facelift:
+ *    Put files under include/linux/byteorder/
+ *    Split swab from generic support.
+ *
+ * TODO:
+ *   = Regular kernel maintainers could also replace all these manual
+ *    byteswap macros that remain, disseminated among drivers,
+ *    after some grep or the sources...
+ *   = Linus might want to rename all these macros and files to fit his taste,
+ *    to fit his personal naming scheme.
+ *   = it seems that a few drivers would also appreciate
+ *    nybble swapping support...
+ *   = every architecture could add their byteswap macro in asm/byteorder.h
+ *    see how some architectures already do (i386, alpha, ppc, etc)
+ *   = cpu_to_beXX and beXX_to_cpu might some day need to be well
+ *    distinguished throughout the kernel. This is not the case currently,
+ *    since little endian, big endian, and pdp endian machines needn't it.
+ *    But this might be the case for, say, a port of Linux to 20/21 bit
+ *    architectures (and F21 Linux addict around?).
+ */
+
+/*
+ * The following macros are to be defined by <asm/byteorder.h>:
+ *
+ * Conversion of long and short int between network and host format
+ *	ntohl(__u32 x)
+ *	ntohs(__u16 x)
+ *	htonl(__u32 x)
+ *	htons(__u16 x)
+ * It seems that some programs (which? where? or perhaps a standard? POSIX?)
+ * might like the above to be functions, not macros (why?).
+ * if that's true, then detect them, and take measures.
+ * Anyway, the measure is: define only ___ntohl as a macro instead,
+ * and in a separate file, have
+ * unsigned long inline ntohl(x){return ___ntohl(x);}
+ *
+ * The same for constant arguments
+ *	__constant_ntohl(__u32 x)
+ *	__constant_ntohs(__u16 x)
+ *	__constant_htonl(__u32 x)
+ *	__constant_htons(__u16 x)
+ *
+ * Conversion of XX-bit integers (16- 32- or 64-)
+ * between native CPU format and little/big endian format
+ * 64-bit stuff only defined for proper architectures
+ *	cpu_to_[bl]eXX(__uXX x)
+ *	[bl]eXX_to_cpu(__uXX x)
+ *
+ * The same, but takes a pointer to the value to convert
+ *	cpu_to_[bl]eXXp(__uXX x)
+ *	[bl]eXX_to_cpup(__uXX x)
+ *
+ * The same, but change in situ
+ *	cpu_to_[bl]eXXs(__uXX x)
+ *	[bl]eXX_to_cpus(__uXX x)
+ *
+ * See asm-foo/byteorder.h for examples of how to provide
+ * architecture-optimized versions
+ *
+ */
+
+
+#if defined(__KERNEL__)
+/*
+ * inside the kernel, we can use nicknames;
+ * outside of it, we must avoid POSIX namespace pollution...
+ */
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+#endif
+
+
+/*
+ * Handle ntohl and suches. These have various compatibility
+ * issues - like we want to give the prototype even though we
+ * also have a macro for them in case some strange program
+ * wants to take the address of the thing or something..
+ *
+ * Note that these used to return a "long" in libc5, even though
+ * long is often 64-bit these days.. Thus the casts.
+ *
+ * They have to be macros in order to do the constant folding
+ * correctly - if the argument passed into a inline function
+ * it is no longer constant according to gcc..
+ */
+
+#undef ntohl
+#undef ntohs
+#undef htonl
+#undef htons
+
+/*
+ * Do the prototypes. Somebody might want to take the
+ * address or some such sick thing..
+ */
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+extern __u32			ntohl(__u32);
+extern __u32			htonl(__u32);
+#else
+extern unsigned long int	ntohl(unsigned long int);
+extern unsigned long int	htonl(unsigned long int);
+#endif
+extern unsigned short int	ntohs(unsigned short int);
+extern unsigned short int	htons(unsigned short int);
+
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+
+#define ___htonl(x) __cpu_to_be32(x)
+#define ___htons(x) __cpu_to_be16(x)
+#define ___ntohl(x) __be32_to_cpu(x)
+#define ___ntohs(x) __be16_to_cpu(x)
+
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+#define htonl(x) ___htonl(x)
+#define ntohl(x) ___ntohl(x)
+#else
+#define htonl(x) ((unsigned long)___htonl(x))
+#define ntohl(x) ((unsigned long)___ntohl(x))
+#endif
+#define htons(x) ___htons(x)
+#define ntohs(x) ___ntohs(x)
+
+#endif /* OPTIMIZE */
+
+
+#endif /* _LINUX_BYTEORDER_GENERIC_H */
diff --git a/xen/include/xeno/byteorder/little_endian.h b/xen/include/xeno/byteorder/little_endian.h
new file mode 100644
index 0000000000..1431663621
--- /dev/null
+++ b/xen/include/xeno/byteorder/little_endian.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_BYTEORDER_LITTLE_ENDIAN_H
+#define _LINUX_BYTEORDER_LITTLE_ENDIAN_H
+
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN 1234
+#endif
+#ifndef __LITTLE_ENDIAN_BITFIELD
+#define __LITTLE_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+
+#define __constant_htonl(x) ___constant_swab32((x))
+#define __constant_ntohl(x) ___constant_swab32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) ((__u64)(x))
+#define __constant_le64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_le32(x) ((__u32)(x))
+#define __constant_le32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le16(x) ((__u16)(x))
+#define __constant_le16_to_cpu(x) ((__u16)(x))
+#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) ((__u64)(x))
+#define __le64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_le32(x) ((__u32)(x))
+#define __le32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le16(x) ((__u16)(x))
+#define __le16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_be64(x) __swab64((x))
+#define __be64_to_cpu(x) __swab64((x))
+#define __cpu_to_be32(x) __swab32((x))
+#define __be32_to_cpu(x) __swab32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) (*(__u64*)(x))
+#define __le64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_le32p(x) (*(__u32*)(x))
+#define __le32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) __swab64p((x))
+#define __be64_to_cpup(x) __swab64p((x))
+#define __cpu_to_be32p(x) __swab32p((x))
+#define __be32_to_cpup(x) __swab32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) do {} while (0)
+#define __le64_to_cpus(x) do {} while (0)
+#define __cpu_to_le32s(x) do {} while (0)
+#define __le32_to_cpus(x) do {} while (0)
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) __swab64s((x))
+#define __be64_to_cpus(x) __swab64s((x))
+#define __cpu_to_be32s(x) __swab32s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */
diff --git a/xen/include/xeno/byteorder/pdp_endian.h b/xen/include/xeno/byteorder/pdp_endian.h
new file mode 100644
index 0000000000..618631cbc6
--- /dev/null
+++ b/xen/include/xeno/byteorder/pdp_endian.h
@@ -0,0 +1,88 @@
+#ifndef _LINUX_BYTEORDER_PDP_ENDIAN_H
+#define _LINUX_BYTEORDER_PDP_ENDIAN_H
+
+/*
+ * Could have been named NUXI-endian, but we use the same name as in glibc.
+ * hopefully only the PDP and its evolutions (old VAXen in compatibility mode)
+ * should ever use this braindead byteorder.
+ * This file *should* work, but has not been tested.
+ *
+ * little-endian is 1234; big-endian is 4321; nuxi/pdp-endian is 3412
+ *
+ * I thought vaxen were NUXI-endian, but was told they were correct-endian
+ * (little-endian), though indeed there existed NUXI-endian machines
+ * (DEC PDP-11 and old VAXen in compatibility mode).
+ * This makes this file a bit useless, but as a proof-of-concept.
+ *
+ * But what does a __u64 look like: is it 34127856 or 78563412 ???
+ * I don't dare imagine! Hence, no 64-bit byteorder support yet.
+ * Hopefully, there 64-bit pdp-endian support shouldn't ever be required.
+ *
+ */
+
+#ifndef __PDP_ENDIAN
+#define __PDP_ENDIAN 3412
+#endif
+#ifndef __PDP_ENDIAN_BITFIELD
+#define __PDP_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
+
+#define __constant_htonl(x) ___constant_swahb32((x))
+#define __constant_ntohl(x) ___constant_swahb32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) I DON'T KNOW
+#define __constant_le64_to_cpu(x) I DON'T KNOW
+#define __constant_cpu_to_le32(x) ___constant_swahw32((x))
+#define __constant_le32_to_cpu(x) ___constant_swahw32((x))
+#define __constant_cpu_to_le16(x) ((__u16)(x)
+#define __constant_le16_to_cpu(x) ((__u16)(x)
+#define __constant_cpu_to_be64(x) I DON'T KNOW
+#define __constant_be64_to_cpu(x) I DON'T KNOW
+#define __constant_cpu_to_be32(x) ___constant_swahb32((x))
+#define __constant_be32_to_cpu(x) ___constant_swahb32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) I DON'T KNOW
+#define __le64_to_cpu(x) I DON'T KNOW
+#define __cpu_to_le32(x) ___swahw32((x))
+#define __le32_to_cpu(x) ___swahw32((x))
+#define __cpu_to_le16(x) ((__u16)(x)
+#define __le16_to_cpu(x) ((__u16)(x)
+#define __cpu_to_be64(x) I DON'T KNOW
+#define __be64_to_cpu(x) I DON'T KNOW
+#define __cpu_to_be32(x) __swahb32((x))
+#define __be32_to_cpu(x) __swahb32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) I DON'T KNOW
+#define __le64_to_cpup(x) I DON'T KNOW
+#define __cpu_to_le32p(x) ___swahw32p((x))
+#define __le32_to_cpup(x) ___swahw32p((x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) I DON'T KNOW
+#define __be64_to_cpup(x) I DON'T KNOW
+#define __cpu_to_be32p(x) __swahb32p((x))
+#define __be32_to_cpup(x) __swahb32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) I DON'T KNOW
+#define __le64_to_cpus(x) I DON'T KNOW
+#define __cpu_to_le32s(x) ___swahw32s((x))
+#define __le32_to_cpus(x) ___swahw32s((x))
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) I DON'T KNOW
+#define __be64_to_cpus(x) I DON'T KNOW
+#define __cpu_to_be32s(x) __swahb32s((x))
+#define __be32_to_cpus(x) __swahb32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_PDP_ENDIAN_H */
diff --git a/xen/include/xeno/byteorder/swab.h b/xen/include/xeno/byteorder/swab.h
new file mode 100644
index 0000000000..814b4519ff
--- /dev/null
+++ b/xen/include/xeno/byteorder/swab.h
@@ -0,0 +1,190 @@
+#ifndef _LINUX_BYTEORDER_SWAB_H
+#define _LINUX_BYTEORDER_SWAB_H
+
+/*
+ * linux/byteorder/swab.h
+ * Byte-swapping, independently from CPU endianness
+ *	swabXX[ps]?(foo)
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ *    separated swab functions from cpu_to_XX,
+ *    to clean up support for bizarre-endian architectures.
+ *
+ * See asm-i386/byteorder.h and suches for examples of how to provide
+ * architecture-dependent optimized versions
+ *
+ */
+
+/* casts are necessary for constants, because we never know how for sure
+ * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
+ */
+#define ___swab16(x) \
+({ \
+	__u16 __x = (x); \
+	((__u16)( \
+		(((__u16)(__x) & (__u16)0x00ffU) << 8) | \
+		(((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
+})
+
+#define ___swab32(x) \
+({ \
+	__u32 __x = (x); \
+	((__u32)( \
+		(((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
+		(((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) | \
+		(((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) | \
+		(((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
+})
+
+#define ___swab64(x) \
+({ \
+	__u64 __x = (x); \
+	((__u64)( \
+		(__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
+		(__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
+		(__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+		(__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+	        (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+		(__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+		(__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+		(__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
+})
+
+#define ___constant_swab16(x) \
+	((__u16)( \
+		(((__u16)(x) & (__u16)0x00ffU) << 8) | \
+		(((__u16)(x) & (__u16)0xff00U) >> 8) ))
+#define ___constant_swab32(x) \
+	((__u32)( \
+		(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+		(((__u32)(x) & (__u32)0x0000ff00UL) <<  8) | \
+		(((__u32)(x) & (__u32)0x00ff0000UL) >>  8) | \
+		(((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
+#define ___constant_swab64(x) \
+	((__u64)( \
+		(__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
+		(__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
+		(__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+		(__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+	        (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+		(__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+		(__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+		(__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swab16
+#  define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })
+#endif
+#ifndef __arch__swab32
+#  define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })
+#endif
+#ifndef __arch__swab64
+#  define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })
+#endif
+
+#ifndef __arch__swab16p
+#  define __arch__swab16p(x) __arch__swab16(*(x))
+#endif
+#ifndef __arch__swab32p
+#  define __arch__swab32p(x) __arch__swab32(*(x))
+#endif
+#ifndef __arch__swab64p
+#  define __arch__swab64p(x) __arch__swab64(*(x))
+#endif
+
+#ifndef __arch__swab16s
+#  define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)
+#endif
+#ifndef __arch__swab32s
+#  define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)
+#endif
+#ifndef __arch__swab64s
+#  define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#  define __swab16(x) \
+(__builtin_constant_p((__u16)(x)) ? \
+ ___swab16((x)) : \
+ __fswab16((x)))
+#  define __swab32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swab32((x)) : \
+ __fswab32((x)))
+#  define __swab64(x) \
+(__builtin_constant_p((__u64)(x)) ? \
+ ___swab64((x)) : \
+ __fswab64((x)))
+#else
+#  define __swab16(x) __fswab16(x)
+#  define __swab32(x) __fswab32(x)
+#  define __swab64(x) __fswab64(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u16 __fswab16(__u16 x)
+{
+	return __arch__swab16(x);
+}
+static __inline__ __u16 __swab16p(__u16 *x)
+{
+	return __arch__swab16p(x);
+}
+static __inline__ void __swab16s(__u16 *addr)
+{
+	__arch__swab16s(addr);
+}
+
+static __inline__ __const__ __u32 __fswab32(__u32 x)
+{
+	return __arch__swab32(x);
+}
+static __inline__ __u32 __swab32p(__u32 *x)
+{
+	return __arch__swab32p(x);
+}
+static __inline__ void __swab32s(__u32 *addr)
+{
+	__arch__swab32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+static __inline__ __const__ __u64 __fswab64(__u64 x)
+{
+#  ifdef __SWAB_64_THRU_32__
+	__u32 h = x >> 32;
+        __u32 l = x & ((1ULL<<32)-1);
+        return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h)));
+#  else
+	return __arch__swab64(x);
+#  endif
+}
+static __inline__ __u64 __swab64p(__u64 *x)
+{
+	return __arch__swab64p(x);
+}
+static __inline__ void __swab64s(__u64 *addr)
+{
+	__arch__swab64s(addr);
+}
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swab16 __swab16
+#define swab32 __swab32
+#define swab64 __swab64
+#define swab16p __swab16p
+#define swab32p __swab32p
+#define swab64p __swab64p
+#define swab16s __swab16s
+#define swab32s __swab32s
+#define swab64s __swab64s
+#endif
+
+#endif /* _LINUX_BYTEORDER_SWAB_H */
diff --git a/xen/include/xeno/byteorder/swabb.h b/xen/include/xeno/byteorder/swabb.h
new file mode 100644
index 0000000000..d28d9a804d
--- /dev/null
+++ b/xen/include/xeno/byteorder/swabb.h
@@ -0,0 +1,137 @@
+#ifndef _LINUX_BYTEORDER_SWABB_H
+#define _LINUX_BYTEORDER_SWABB_H
+
+/*
+ * linux/byteorder/swabb.h
+ * SWAp Bytes Bizarrely
+ *	swaHHXX[ps]?(foo)
+ *
+ * Support for obNUXIous pdp-endian and other bizarre architectures.
+ * Will Linux ever run on such ancient beasts? if not, this file
+ * will be but a programming pearl. Still, it's a reminder that we
+ * shouldn't be making too many assumptions when trying to be portable.
+ *
+ */
+
+/*
+ * Meaning of the names I chose (vaxlinux people feel free to correct them):
+ * swahw32	swap 16-bit half-words in a 32-bit word
+ * swahb32	swap 8-bit halves of each 16-bit half-word in a 32-bit word
+ *
+ * No 64-bit support yet. I don't know NUXI conventions for long longs.
+ * I guarantee it will be a mess when it's there, though :->
+ * It will be even worse if there are conflicting 64-bit conventions.
+ * Hopefully, no one ever used 64-bit objects on NUXI machines.
+ *
+ */
+
+#define ___swahw32(x) \
+({ \
+	__u32 __x = (x); \
+	((__u32)( \
+		(((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \
+		(((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \
+})
+#define ___swahb32(x) \
+({ \
+	__u32 __x = (x); \
+	((__u32)( \
+		(((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \
+		(((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \
+})
+
+#define ___constant_swahw32(x) \
+	((__u32)( \
+		(((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
+		(((__u32)(x) & (__u32)0xffff0000UL) >> 16) ))
+#define ___constant_swahb32(x) \
+	((__u32)( \
+		(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
+		(((__u32)(x) & (__u32)0xff00ff00UL) >> 8) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swahw32
+#  define __arch__swahw32(x) ___swahw32(x)
+#endif
+#ifndef __arch__swahb32
+#  define __arch__swahb32(x) ___swahb32(x)
+#endif
+
+#ifndef __arch__swahw32p
+#  define __arch__swahw32p(x) __swahw32(*(x))
+#endif
+#ifndef __arch__swahb32p
+#  define __arch__swahb32p(x) __swahb32(*(x))
+#endif
+
+#ifndef __arch__swahw32s
+#  define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0)
+#endif
+#ifndef __arch__swahb32s
+#  define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#  define __swahw32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swahw32((x)) : \
+ __fswahw32((x)))
+#  define __swahb32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swahb32((x)) : \
+ __fswahb32((x)))
+#else
+#  define __swahw32(x) __fswahw32(x)
+#  define __swahb32(x) __fswahb32(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u32 __fswahw32(__u32 x)
+{
+	return __arch__swahw32(x);
+}
+static __inline__ __u32 __swahw32p(__u32 *x)
+{
+	return __arch__swahw32p(x);
+}
+static __inline__ void __swahw32s(__u32 *addr)
+{
+	__arch__swahw32s(addr);
+}
+
+
+static __inline__ __const__ __u32 __fswahb32(__u32 x)
+{
+	return __arch__swahb32(x);
+}
+static __inline__ __u32 __swahb32p(__u32 *x)
+{
+	return __arch__swahb32p(x);
+}
+static __inline__ void __swahb32s(__u32 *addr)
+{
+	__arch__swahb32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+/*
+ * Not supported yet
+ */
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swahw32 __swahw32
+#define swahb32 __swahb32
+#define swahw32p __swahw32p
+#define swahb32p __swahb32p
+#define swahw32s __swahw32s
+#define swahb32s __swahb32s
+#endif
+
+#endif /* _LINUX_BYTEORDER_SWABB_H */
diff --git a/xen/include/xeno/cache.h b/xen/include/xeno/cache.h
new file mode 100644
index 0000000000..73a3be7f9f
--- /dev/null
+++ b/xen/include/xeno/cache.h
@@ -0,0 +1,37 @@
+#ifndef __LINUX_CACHE_H
+#define __LINUX_CACHE_H
+
+#include <xeno/config.h>
+#include <asm/cache.h>
+
+#ifndef L1_CACHE_ALIGN
+#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
+#endif
+
+#ifndef SMP_CACHE_BYTES
+#define SMP_CACHE_BYTES L1_CACHE_BYTES
+#endif
+
+#ifndef ____cacheline_aligned
+#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#ifndef __cacheline_aligned
+#ifdef MODULE
+#define __cacheline_aligned ____cacheline_aligned
+#else
+#define __cacheline_aligned					\
+  __attribute__((__aligned__(SMP_CACHE_BYTES),			\
+		 __section__(".data.cacheline_aligned")))
+#endif
+#endif /* __cacheline_aligned */
+
+#endif /* __LINUX_CACHE_H */
diff --git a/xen/include/xeno/config.h b/xen/include/xeno/config.h
new file mode 100644
index 0000000000..7d7205b69a
--- /dev/null
+++ b/xen/include/xeno/config.h
@@ -0,0 +1,136 @@
+/******************************************************************************
+ * config.h
+ * 
+ * A Linux-style configuration list.
+ */
+
+#ifndef __XENO_CONFIG_H__
+#define __XENO_CONFIG_H__
+
+#define CONFIG_X86 1
+
+#define CONFIG_SMP 1
+#define CONFIG_X86_LOCAL_APIC 1
+#define CONFIG_X86_IO_APIC 1
+#define CONFIG_X86_L1_CACHE_SHIFT 5
+
+#define CONFIG_PCI 1
+#define CONFIG_PCI_BIOS 1
+#define CONFIG_PCI_DIRECT 1
+
+#define CONFIG_IDE 1
+#define CONFIG_BLK_DEV_IDE 1
+#define CONFIG_BLK_DEV_IDEDMA 1
+#define CONFIG_BLK_DEV_IDEPCI 1
+#define CONFIG_IDEDISK_MULTI_MODE 1
+#define CONFIG_IDEDISK_STROKE 1
+#define CONFIG_IDEPCI_SHARE_IRQ 1
+#define CONFIG_BLK_DEV_IDEDMA_PCI 1
+#define CONFIG_IDEDMA_PCI_AUTO 1
+#define CONFIG_IDEDMA_AUTO 1
+#define CONFIG_BLK_DEV_IDE_MODES 1
+#define CONFIG_BLK_DEV_PIIX 1
+
+#define CONFIG_SCSI 1
+#define CONFIG_BLK_DEV_SD 1
+#define CONFIG_SD_EXTRA_DEVS 40
+#define CONFIG_SCSI_MULTI_LUN 1
+
+#define HZ 100
+
+/* Just to keep compiler happy. */
+#define SMP_CACHE_BYTES 64
+#define NR_CPUS 16
+#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned __cacheline_aligned
+
+/*** Hypervisor owns top 64MB of virtual address space. ***/
+#define HYPERVISOR_VIRT_START (0xFC000000UL)
+
+/*
+ * First 4MB are mapped read-only for all. It's for the machine->physical
+ * mapping table (MPT table). The following are virtual addresses.
+ */
+#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START)
+#define READONLY_MPT_VIRT_END   (READONLY_MPT_VIRT_START + (4*1024*1024))
+/*
+ * Next 16MB is fixed monitor space, which is part of a 48MB direct-mapped
+ * memory region. The following are machine addresses.
+ */
+#define MAX_MONITOR_ADDRESS   (16*1024*1024)
+#define MAX_DMA_ADDRESS       (16*1024*1024)
+#define MAX_DIRECTMAP_ADDRESS (48*1024*1024)
+/* And the virtual addresses for the direct-map region... */
+#define DIRECTMAP_VIRT_START  (READONLY_MPT_VIRT_END)
+#define DIRECTMAP_VIRT_END    (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
+#define MONITOR_VIRT_START    (DIRECTMAP_VIRT_START)
+#define MONITOR_VIRT_END      (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
+#define RDWR_MPT_VIRT_START   (MONITOR_VIRT_END)
+#define RDWR_MPT_VIRT_END     (RDWR_MPT_VIRT_START + (4*1024*1024))
+#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
+#define FRAMETABLE_VIRT_END   (DIRECTMAP_VIRT_END)
+/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
+#define PERDOMAIN_VIRT_START  (DIRECTMAP_VIRT_END)
+#define PERDOMAIN_VIRT_END    (PERDOMAIN_VIRT_START + (4*1024*1024))
+/* Penultimate 4MB of virtual address space used for domain page mappings. */
+#define MAPCACHE_VIRT_START   (PERDOMAIN_VIRT_END)
+#define MAPCACHE_VIRT_END     (MAPCACHE_VIRT_START + (4*1024*1024))
+/* Final 4MB of virtual address space used for ioremap(). */
+#define IOREMAP_VIRT_START    (MAPCACHE_VIRT_END)
+#define IOREMAP_VIRT_END      (IOREMAP_VIRT_START + (4*1024*1024))
+
+/* Linkage for x86 */
+#define FASTCALL(x)     x __attribute__((regparm(3)))
+#define asmlinkage        __attribute__((regparm(0)))
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#define SYMBOL_NAME_STR(X) #X
+#define SYMBOL_NAME(X) X
+#define SYMBOL_NAME_LABEL(X) X##:
+#ifdef __ASSEMBLY__
+#define ALIGN __ALIGN
+#define ALIGN_STR __ALIGN_STR
+#define ENTRY(name) \
+  .globl SYMBOL_NAME(name); \
+  ALIGN; \
+  SYMBOL_NAME_LABEL(name)
+#endif
+
+/* syslog levels ==> nothing! */
+#define KERN_NOTICE
+#define KERN_WARNING
+#define KERN_DEBUG
+#define KERN_INFO
+#define KERN_ERR
+#define KERN_CRIT
+#define KERN_EMERG
+#define KERN_ALERT
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define __HYPERVISOR_CS 0x30
+#define __HYPERVISOR_DS 0x38
+#define __GUEST_CS      0x11
+#define __GUEST_DS      0x19
+
+#define NR_syscalls 255
+
+#define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f))
+#define struct_cpy(_x,_y) (memcpy((_x),(_y),sizeof(*(_x))))
+
+#define likely(_x) (_x)
+#define unlikely(_x) (_x)
+
+#define dev_probe_lock() ((void)0)
+#define dev_probe_unlock() ((void)0)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define capable(_c) 0
+
+#ifndef __ASSEMBLY__
+extern void __out_of_line_bug(int line) __attribute__((noreturn));
+#define out_of_line_bug() __out_of_line_bug(__LINE__)
+#endif
+
+#endif /* __XENO_CONFIG_H__ */
diff --git a/xen/include/xeno/ctype.h b/xen/include/xeno/ctype.h
new file mode 100644
index 0000000000..afa3639229
--- /dev/null
+++ b/xen/include/xeno/ctype.h
@@ -0,0 +1,54 @@
+#ifndef _LINUX_CTYPE_H
+#define _LINUX_CTYPE_H
+
+/*
+ * NOTE! This ctype does not handle EOF like the standard C
+ * library is required to.
+ */
+
+#define _U	0x01	/* upper */
+#define _L	0x02	/* lower */
+#define _D	0x04	/* digit */
+#define _C	0x08	/* cntrl */
+#define _P	0x10	/* punct */
+#define _S	0x20	/* white space (space/lf/tab) */
+#define _X	0x40	/* hex digit */
+#define _SP	0x80	/* hard space (0x20) */
+
+extern unsigned char _ctype[];
+
+#define __ismask(x) (_ctype[(int)(unsigned char)(x)])
+
+#define isalnum(c)	((__ismask(c)&(_U|_L|_D)) != 0)
+#define isalpha(c)	((__ismask(c)&(_U|_L)) != 0)
+#define iscntrl(c)	((__ismask(c)&(_C)) != 0)
+#define isdigit(c)	((__ismask(c)&(_D)) != 0)
+#define isgraph(c)	((__ismask(c)&(_P|_U|_L|_D)) != 0)
+#define islower(c)	((__ismask(c)&(_L)) != 0)
+#define isprint(c)	((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
+#define ispunct(c)	((__ismask(c)&(_P)) != 0)
+#define isspace(c)	((__ismask(c)&(_S)) != 0)
+#define isupper(c)	((__ismask(c)&(_U)) != 0)
+#define isxdigit(c)	((__ismask(c)&(_D|_X)) != 0)
+
+#define isascii(c) (((unsigned char)(c))<=0x7f)
+#define toascii(c) (((unsigned char)(c))&0x7f)
+
+static inline unsigned char __tolower(unsigned char c)
+{
+	if (isupper(c))
+		c -= 'A'-'a';
+	return c;
+}
+
+static inline unsigned char __toupper(unsigned char c)
+{
+	if (islower(c))
+		c -= 'a'-'A';
+	return c;
+}
+
+#define tolower(c) __tolower(c)
+#define toupper(c) __toupper(c)
+
+#endif
diff --git a/xen/include/xeno/delay.h b/xen/include/xeno/delay.h
new file mode 100644
index 0000000000..9d70ef035f
--- /dev/null
+++ b/xen/include/xeno/delay.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_DELAY_H
+#define _LINUX_DELAY_H
+
+/* Copyright (C) 1993 Linus Torvalds */
+
+#include <asm/delay.h>
+#define mdelay(n) (\
+	{unsigned long msec=(n); while (msec--) udelay(1000);})
+
+#endif /* defined(_LINUX_DELAY_H) */
diff --git a/xen/include/xeno/dom0_ops.h b/xen/include/xeno/dom0_ops.h
new file mode 100644
index 0000000000..49a5842fab
--- /dev/null
+++ b/xen/include/xeno/dom0_ops.h
@@ -0,0 +1,63 @@
+/******************************************************************************
+ * dom0_ops.h
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser, B Dragovic
+ */
+
+#ifndef __DOM0_OPS_H__
+#define __DOM0_OPS_H__
+
+#define DOM0_NEWDOMAIN   0
+#define DOM0_KILLDOMAIN  1
+#define DOM0_GETMEMLIST  2
+#define DOM0_STARTDOM    4
+
+#define MAX_CMD_LEN    256
+
+typedef struct dom0_newdomain_st 
+{
+    unsigned int domain;    // return parameter
+    unsigned int memory_kb; 
+    unsigned int num_vifs;  // temporary
+    unsigned long pg_head;  // return parameter
+} dom0_newdomain_t;
+
+typedef struct dom0_killdomain_st
+{
+    unsigned int domain;
+} dom0_killdomain_t;
+
+typedef struct dom0_getmemlist_st
+{
+    unsigned long start_pfn;
+    unsigned long num_pfns;
+    void *buffer;
+} dom0_getmemlist_t;
+
+typedef struct domain_launch
+{
+    unsigned int domain;
+    unsigned long l2_pgt_addr;
+    unsigned long virt_load_addr;
+    unsigned long virt_shinfo_addr;
+    unsigned long virt_startinfo_addr;
+    unsigned int num_vifs;
+    char cmd_line[MAX_CMD_LEN];
+} dom_meminfo_t;
+
+typedef struct dom0_op_st
+{
+    unsigned long cmd;
+    union
+    {
+        dom0_newdomain_t newdomain;
+        dom0_killdomain_t killdomain;
+        dom0_getmemlist_t getmemlist;
+        dom_meminfo_t meminfo;
+    }
+    u;
+} dom0_op_t;
+
+#endif
diff --git a/xen/include/xeno/elevator.h b/xen/include/xeno/elevator.h
new file mode 100644
index 0000000000..1a8bb5c39a
--- /dev/null
+++ b/xen/include/xeno/elevator.h
@@ -0,0 +1,104 @@
+#ifndef _LINUX_ELEVATOR_H
+#define _LINUX_ELEVATOR_H
+
+typedef void (elevator_fn) (struct request *, elevator_t *,
+			    struct list_head *,
+			    struct list_head *, int);
+
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
+				 struct buffer_head *, int, int);
+
+typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
+
+typedef void (elevator_merge_req_fn) (struct request *, struct request *);
+
+struct elevator_s
+{
+	int read_latency;
+	int write_latency;
+
+	elevator_merge_fn *elevator_merge_fn;
+	elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
+	elevator_merge_req_fn *elevator_merge_req_fn;
+
+	unsigned int queue_ID;
+};
+
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_noop_merge_req(struct request *, struct request *);
+
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_linus_merge_req(struct request *, struct request *);
+
+typedef struct blkelv_ioctl_arg_s {
+	int queue_ID;
+	int read_latency;
+	int write_latency;
+	int max_bomb_segments;
+} blkelv_ioctl_arg_t;
+
+#define BLKELVGET   _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))
+#define BLKELVSET   _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))
+
+extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *);
+extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *);
+
+extern void elevator_init(elevator_t *, elevator_t);
+
+/*
+ * Return values from elevator merger
+ */
+#define ELEVATOR_NO_MERGE	0
+#define ELEVATOR_FRONT_MERGE	1
+#define ELEVATOR_BACK_MERGE	2
+
+/*
+ * This is used in the elevator algorithm.  We don't prioritise reads
+ * over writes any more --- although reads are more time-critical than
+ * writes, by treating them equally we increase filesystem throughput.
+ * This turns out to give better overall performance.  -- sct
+ */
+#define IN_ORDER(s1,s2)				\
+	((((s1)->rq_dev == (s2)->rq_dev &&	\
+	   (s1)->sector < (s2)->sector)) ||	\
+	 (s1)->rq_dev < (s2)->rq_dev)
+
+#define BHRQ_IN_ORDER(bh, rq)			\
+	((((bh)->b_rdev == (rq)->rq_dev &&	\
+	   (bh)->b_rsector < (rq)->sector)) ||	\
+	 (bh)->b_rdev < (rq)->rq_dev)
+
+static inline int elevator_request_latency(elevator_t * elevator, int rw)
+{
+	int latency;
+
+	latency = elevator->read_latency;
+	if (rw != READ)
+		latency = elevator->write_latency;
+
+	return latency;
+}
+
+#define ELEVATOR_NOOP							\
+((elevator_t) {								\
+	0,				/* read_latency */		\
+	0,				/* write_latency */		\
+									\
+	elevator_noop_merge,		/* elevator_merge_fn */		\
+	elevator_noop_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
+	elevator_noop_merge_req,	/* elevator_merge_req_fn */	\
+	})
+
+#define ELEVATOR_LINUS							\
+((elevator_t) {								\
+	8192,				/* read passovers */		\
+	16384,				/* write passovers */		\
+									\
+	elevator_linus_merge,		/* elevator_merge_fn */		\
+	elevator_linus_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
+	elevator_linus_merge_req,	/* elevator_merge_req_fn */	\
+	})
+
+#endif
diff --git a/xen/include/xeno/errno.h b/xen/include/xeno/errno.h
new file mode 100644
index 0000000000..7cf599f4de
--- /dev/null
+++ b/xen/include/xeno/errno.h
@@ -0,0 +1,132 @@
+#ifndef _I386_ERRNO_H
+#define _I386_ERRNO_H
+
+#define	EPERM		 1	/* Operation not permitted */
+#define	ENOENT		 2	/* No such file or directory */
+#define	ESRCH		 3	/* No such process */
+#define	EINTR		 4	/* Interrupted system call */
+#define	EIO		 5	/* I/O error */
+#define	ENXIO		 6	/* No such device or address */
+#define	E2BIG		 7	/* Arg list too long */
+#define	ENOEXEC		 8	/* Exec format error */
+#define	EBADF		 9	/* Bad file number */
+#define	ECHILD		10	/* No child processes */
+#define	EAGAIN		11	/* Try again */
+#define	ENOMEM		12	/* Out of memory */
+#define	EACCES		13	/* Permission denied */
+#define	EFAULT		14	/* Bad address */
+#define	ENOTBLK		15	/* Block device required */
+#define	EBUSY		16	/* Device or resource busy */
+#define	EEXIST		17	/* File exists */
+#define	EXDEV		18	/* Cross-device link */
+#define	ENODEV		19	/* No such device */
+#define	ENOTDIR		20	/* Not a directory */
+#define	EISDIR		21	/* Is a directory */
+#define	EINVAL		22	/* Invalid argument */
+#define	ENFILE		23	/* File table overflow */
+#define	EMFILE		24	/* Too many open files */
+#define	ENOTTY		25	/* Not a typewriter */
+#define	ETXTBSY		26	/* Text file busy */
+#define	EFBIG		27	/* File too large */
+#define	ENOSPC		28	/* No space left on device */
+#define	ESPIPE		29	/* Illegal seek */
+#define	EROFS		30	/* Read-only file system */
+#define	EMLINK		31	/* Too many links */
+#define	EPIPE		32	/* Broken pipe */
+#define	EDOM		33	/* Math argument out of domain of func */
+#define	ERANGE		34	/* Math result not representable */
+#define	EDEADLK		35	/* Resource deadlock would occur */
+#define	ENAMETOOLONG	36	/* File name too long */
+#define	ENOLCK		37	/* No record locks available */
+#define	ENOSYS		38	/* Function not implemented */
+#define	ENOTEMPTY	39	/* Directory not empty */
+#define	ELOOP		40	/* Too many symbolic links encountered */
+#define	EWOULDBLOCK	EAGAIN	/* Operation would block */
+#define	ENOMSG		42	/* No message of desired type */
+#define	EIDRM		43	/* Identifier removed */
+#define	ECHRNG		44	/* Channel number out of range */
+#define	EL2NSYNC	45	/* Level 2 not synchronized */
+#define	EL3HLT		46	/* Level 3 halted */
+#define	EL3RST		47	/* Level 3 reset */
+#define	ELNRNG		48	/* Link number out of range */
+#define	EUNATCH		49	/* Protocol driver not attached */
+#define	ENOCSI		50	/* No CSI structure available */
+#define	EL2HLT		51	/* Level 2 halted */
+#define	EBADE		52	/* Invalid exchange */
+#define	EBADR		53	/* Invalid request descriptor */
+#define	EXFULL		54	/* Exchange full */
+#define	ENOANO		55	/* No anode */
+#define	EBADRQC		56	/* Invalid request code */
+#define	EBADSLT		57	/* Invalid slot */
+
+#define	EDEADLOCK	EDEADLK
+
+#define	EBFONT		59	/* Bad font file format */
+#define	ENOSTR		60	/* Device not a stream */
+#define	ENODATA		61	/* No data available */
+#define	ETIME		62	/* Timer expired */
+#define	ENOSR		63	/* Out of streams resources */
+#define	ENONET		64	/* Machine is not on the network */
+#define	ENOPKG		65	/* Package not installed */
+#define	EREMOTE		66	/* Object is remote */
+#define	ENOLINK		67	/* Link has been severed */
+#define	EADV		68	/* Advertise error */
+#define	ESRMNT		69	/* Srmount error */
+#define	ECOMM		70	/* Communication error on send */
+#define	EPROTO		71	/* Protocol error */
+#define	EMULTIHOP	72	/* Multihop attempted */
+#define	EDOTDOT		73	/* RFS specific error */
+#define	EBADMSG		74	/* Not a data message */
+#define	EOVERFLOW	75	/* Value too large for defined data type */
+#define	ENOTUNIQ	76	/* Name not unique on network */
+#define	EBADFD		77	/* File descriptor in bad state */
+#define	EREMCHG		78	/* Remote address changed */
+#define	ELIBACC		79	/* Can not access a needed shared library */
+#define	ELIBBAD		80	/* Accessing a corrupted shared library */
+#define	ELIBSCN		81	/* .lib section in a.out corrupted */
+#define	ELIBMAX		82	/* Attempting to link in too many shared libraries */
+#define	ELIBEXEC	83	/* Cannot exec a shared library directly */
+#define	EILSEQ		84	/* Illegal byte sequence */
+#define	ERESTART	85	/* Interrupted system call should be restarted */
+#define	ESTRPIPE	86	/* Streams pipe error */
+#define	EUSERS		87	/* Too many users */
+#define	ENOTSOCK	88	/* Socket operation on non-socket */
+#define	EDESTADDRREQ	89	/* Destination address required */
+#define	EMSGSIZE	90	/* Message too long */
+#define	EPROTOTYPE	91	/* Protocol wrong type for socket */
+#define	ENOPROTOOPT	92	/* Protocol not available */
+#define	EPROTONOSUPPORT	93	/* Protocol not supported */
+#define	ESOCKTNOSUPPORT	94	/* Socket type not supported */
+#define	EOPNOTSUPP	95	/* Operation not supported on transport endpoint */
+#define	EPFNOSUPPORT	96	/* Protocol family not supported */
+#define	EAFNOSUPPORT	97	/* Address family not supported by protocol */
+#define	EADDRINUSE	98	/* Address already in use */
+#define	EADDRNOTAVAIL	99	/* Cannot assign requested address */
+#define	ENETDOWN	100	/* Network is down */
+#define	ENETUNREACH	101	/* Network is unreachable */
+#define	ENETRESET	102	/* Network dropped connection because of reset */
+#define	ECONNABORTED	103	/* Software caused connection abort */
+#define	ECONNRESET	104	/* Connection reset by peer */
+#define	ENOBUFS		105	/* No buffer space available */
+#define	EISCONN		106	/* Transport endpoint is already connected */
+#define	ENOTCONN	107	/* Transport endpoint is not connected */
+#define	ESHUTDOWN	108	/* Cannot send after transport endpoint shutdown */
+#define	ETOOMANYREFS	109	/* Too many references: cannot splice */
+#define	ETIMEDOUT	110	/* Connection timed out */
+#define	ECONNREFUSED	111	/* Connection refused */
+#define	EHOSTDOWN	112	/* Host is down */
+#define	EHOSTUNREACH	113	/* No route to host */
+#define	EALREADY	114	/* Operation already in progress */
+#define	EINPROGRESS	115	/* Operation now in progress */
+#define	ESTALE		116	/* Stale NFS file handle */
+#define	EUCLEAN		117	/* Structure needs cleaning */
+#define	ENOTNAM		118	/* Not a XENIX named type file */
+#define	ENAVAIL		119	/* No XENIX semaphores available */
+#define	EISNAM		120	/* Is a named type file */
+#define	EREMOTEIO	121	/* Remote I/O error */
+#define	EDQUOT		122	/* Quota exceeded */
+
+#define	ENOMEDIUM	123	/* No medium found */
+#define	EMEDIUMTYPE	124	/* Wrong medium type */
+
+#endif
diff --git a/xen/include/xeno/etherdevice.h b/xen/include/xeno/etherdevice.h
new file mode 100644
index 0000000000..bac9b4d5ad
--- /dev/null
+++ b/xen/include/xeno/etherdevice.h
@@ -0,0 +1,68 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  NET  is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions for the Ethernet handlers.
+ *
+ * Version:	@(#)eth.h	1.0.4	05/13/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		Relocated to include/linux where it belongs by Alan Cox 
+ *							<gw4pts@gw4pts.ampr.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	WARNING: This move may well be temporary. This file will get merged with others RSN.
+ *
+ */
+#ifndef _LINUX_ETHERDEVICE_H
+#define _LINUX_ETHERDEVICE_H
+
+#include <linux/if_ether.h>
+
+#ifdef __KERNEL__
+extern int		eth_header(struct sk_buff *skb, struct net_device *dev,
+				   unsigned short type, void *daddr,
+				   void *saddr, unsigned len);
+extern int		eth_rebuild_header(struct sk_buff *skb);
+extern unsigned short	eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+extern void		eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
+						unsigned char * haddr);
+extern int		eth_header_cache(struct neighbour *neigh,
+					 struct hh_cache *hh);
+extern int		eth_header_parse(struct sk_buff *skb,
+					 unsigned char *haddr);
+extern struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv);
+extern struct net_device *alloc_etherdev(int sizeof_priv);
+
+static inline void eth_copy_and_sum (struct sk_buff *dest, unsigned char *src, int len, int base)
+{
+	memcpy (dest->data, src, len);
+}
+
+/**
+ * is_valid_ether_addr - Determine if the given Ethernet address is valid
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
+ * a multicast address, and is not FF:FF:FF:FF:FF:FF.  The multicast
+ * and FF:FF:... tests are combined into the single test "!(addr[0]&1)".
+ *
+ * Return true if the address is valid.
+ */
+static inline int is_valid_ether_addr( u8 *addr )
+{
+	const char zaddr[6] = {0,};
+
+	return !(addr[0]&1) && memcmp( addr, zaddr, 6);
+}
+
+#endif
+
+#endif	/* _LINUX_ETHERDEVICE_H */
diff --git a/xen/include/xeno/ethtool.h b/xen/include/xeno/ethtool.h
new file mode 100644
index 0000000000..e672ac5887
--- /dev/null
+++ b/xen/include/xeno/ethtool.h
@@ -0,0 +1,361 @@
+/*
+ * ethtool.h: Defines for Linux ethtool.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
+ * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
+ * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
+ *                                christopher.leech@intel.com,
+ *                                scott.feldman@intel.com)
+ */
+
+#ifndef _LINUX_ETHTOOL_H
+#define _LINUX_ETHTOOL_H
+
+
+/* This should work for both 32 and 64 bit userland. */
+struct ethtool_cmd {
+	u32	cmd;
+	u32	supported;	/* Features this interface supports */
+	u32	advertising;	/* Features this interface advertises */
+	u16	speed;		/* The forced speed, 10Mb, 100Mb, gigabit */
+	u8	duplex;		/* Duplex, half or full */
+	u8	port;		/* Which connector port */
+	u8	phy_address;
+	u8	transceiver;	/* Which tranceiver to use */
+	u8	autoneg;	/* Enable or disable autonegotiation */
+	u32	maxtxpkt;	/* Tx pkts before generating tx int */
+	u32	maxrxpkt;	/* Rx pkts before generating rx int */
+	u32	reserved[4];
+};
+
+#define ETHTOOL_BUSINFO_LEN	32
+/* these strings are set to whatever the driver author decides... */
+struct ethtool_drvinfo {
+	u32	cmd;
+	char	driver[32];	/* driver short name, "tulip", "eepro100" */
+	char	version[32];	/* driver version string */
+	char	fw_version[32];	/* firmware version string, if applicable */
+	char	bus_info[ETHTOOL_BUSINFO_LEN];	/* Bus info for this IF. */
+				/* For PCI devices, use pci_dev->slot_name. */
+	char	reserved1[32];
+	char	reserved2[16];
+	u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
+	u32	testinfo_len;
+	u32	eedump_len;	/* Size of data from ETHTOOL_GEEPROM (bytes) */
+	u32	regdump_len;	/* Size of data from ETHTOOL_GREGS (bytes) */
+};
+
+#define SOPASS_MAX	6
+/* wake-on-lan settings */
+struct ethtool_wolinfo {
+	u32	cmd;
+	u32	supported;
+	u32	wolopts;
+	u8	sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+
+/* for passing single values */
+struct ethtool_value {
+	u32	cmd;
+	u32	data;
+};
+
+/* for passing big chunks of data */
+struct ethtool_regs {
+	u32	cmd;
+	u32	version; /* driver-specific, indicates different chips/revs */
+	u32	len; /* bytes */
+	u8	data[0];
+};
+
+/* for passing EEPROM chunks */
+struct ethtool_eeprom {
+	u32	cmd;
+	u32	magic;
+	u32	offset; /* in bytes */
+	u32	len; /* in bytes */
+	u8	data[0];
+};
+
+/* for configuring coalescing parameters of chip */
+struct ethtool_coalesce {
+	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
+
+	/* How many usecs to delay an RX interrupt after
+	 * a packet arrives.  If 0, only rx_max_coalesced_frames
+	 * is used.
+	 */
+	u32	rx_coalesce_usecs;
+
+	/* How many packets to delay an RX interrupt after
+	 * a packet arrives.  If 0, only rx_coalesce_usecs is
+	 * used.  It is illegal to set both usecs and max frames
+	 * to zero as this would cause RX interrupts to never be
+	 * generated.
+	 */
+	u32	rx_max_coalesced_frames;
+
+	/* Same as above two parameters, except that these values
+	 * apply while an IRQ is being services by the host.  Not
+	 * all cards support this feature and the values are ignored
+	 * in that case.
+	 */
+	u32	rx_coalesce_usecs_irq;
+	u32	rx_max_coalesced_frames_irq;
+
+	/* How many usecs to delay a TX interrupt after
+	 * a packet is sent.  If 0, only tx_max_coalesced_frames
+	 * is used.
+	 */
+	u32	tx_coalesce_usecs;
+
+	/* How many packets to delay a TX interrupt after
+	 * a packet is sent.  If 0, only tx_coalesce_usecs is
+	 * used.  It is illegal to set both usecs and max frames
+	 * to zero as this would cause TX interrupts to never be
+	 * generated.
+	 */
+	u32	tx_max_coalesced_frames;
+
+	/* Same as above two parameters, except that these values
+	 * apply while an IRQ is being services by the host.  Not
+	 * all cards support this feature and the values are ignored
+	 * in that case.
+	 */
+	u32	tx_coalesce_usecs_irq;
+	u32	tx_max_coalesced_frames_irq;
+
+	/* How many usecs to delay in-memory statistics
+	 * block updates.  Some drivers do not have an in-memory
+	 * statistic block, and in such cases this value is ignored.
+	 * This value must not be zero.
+	 */
+	u32	stats_block_coalesce_usecs;
+
+	/* Adaptive RX/TX coalescing is an algorithm implemented by
+	 * some drivers to improve latency under low packet rates and
+	 * improve throughput under high packet rates.  Some drivers
+	 * only implement one of RX or TX adaptive coalescing.  Anything
+	 * not implemented by the driver causes these values to be
+	 * silently ignored.
+	 */
+	u32	use_adaptive_rx_coalesce;
+	u32	use_adaptive_tx_coalesce;
+
+	/* When the packet rate (measured in packets per second)
+	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
+	 * used.
+	 */
+	u32	pkt_rate_low;
+	u32	rx_coalesce_usecs_low;
+	u32	rx_max_coalesced_frames_low;
+	u32	tx_coalesce_usecs_low;
+	u32	tx_max_coalesced_frames_low;
+
+	/* When the packet rate is below pkt_rate_high but above
+	 * pkt_rate_low (both measured in packets per second) the
+	 * normal {rx,tx}_* coalescing parameters are used.
+	 */
+
+	/* When the packet rate is (measured in packets per second)
+	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
+	 * used.
+	 */
+	u32	pkt_rate_high;
+	u32	rx_coalesce_usecs_high;
+	u32	rx_max_coalesced_frames_high;
+	u32	tx_coalesce_usecs_high;
+	u32	tx_max_coalesced_frames_high;
+
+	/* How often to do adaptive coalescing packet rate sampling,
+	 * measured in seconds.  Must not be zero.
+	 */
+	u32	rate_sample_interval;
+};
+
+/* for configuring RX/TX ring parameters */
+struct ethtool_ringparam {
+	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
+
+	/* Read only attributes.  These indicate the maximum number
+	 * of pending RX/TX ring entries the driver will allow the
+	 * user to set.
+	 */
+	u32	rx_max_pending;
+	u32	rx_mini_max_pending;
+	u32	rx_jumbo_max_pending;
+	u32	tx_max_pending;
+
+	/* Values changeable by the user.  The valid values are
+	 * in the range 1 to the "*_max_pending" counterpart above.
+	 */
+	u32	rx_pending;
+	u32	rx_mini_pending;
+	u32	rx_jumbo_pending;
+	u32	tx_pending;
+};
+
+/* for configuring link flow control parameters */
+struct ethtool_pauseparam {
+	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
+
+	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
+	 * being true) the user may set 'autonet' here non-zero to have the
+	 * pause parameters be auto-negotiated too.  In such a case, the
+	 * {rx,tx}_pause values below determine what capabilities are
+	 * advertised.
+	 *
+	 * If 'autoneg' is zero or the link is not being auto-negotiated,
+	 * then {rx,tx}_pause force the driver to use/not-use pause
+	 * flow control.
+	 */
+	u32	autoneg;
+	u32	rx_pause;
+	u32	tx_pause;
+};
+
+#define ETH_GSTRING_LEN		32
+enum ethtool_stringset {
+	ETH_SS_TEST		= 0,
+	ETH_SS_STATS,
+};
+
+/* for passing string sets for data tagging */
+struct ethtool_gstrings {
+	u32	cmd;		/* ETHTOOL_GSTRINGS */
+	u32	string_set;	/* string set id e.c. ETH_SS_TEST, etc*/
+	u32	len;		/* number of strings in the string set */
+	u8	data[0];
+};
+
+enum ethtool_test_flags {
+	ETH_TEST_FL_OFFLINE	= (1 << 0),	/* online / offline */
+	ETH_TEST_FL_FAILED	= (1 << 1),	/* test passed / failed */
+};
+
+/* for requesting NIC test and getting results*/
+struct ethtool_test {
+	u32	cmd;		/* ETHTOOL_TEST */
+	u32	flags;		/* ETH_TEST_FL_xxx */
+	u32	reserved;
+	u32	len;		/* result length, in number of u64 elements */
+	u64	data[0];
+};
+
+/* for dumping NIC-specific statistics */
+struct ethtool_stats {
+	u32	cmd;		/* ETHTOOL_GSTATS */
+	u32	n_stats;	/* number of u64's being returned */
+	u64	data[0];
+};
+
+/* CMDs currently supported */
+#define ETHTOOL_GSET		0x00000001 /* Get settings. */
+#define ETHTOOL_SSET		0x00000002 /* Set settings, privileged. */
+#define ETHTOOL_GDRVINFO	0x00000003 /* Get driver info. */
+#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers, privileged. */
+#define ETHTOOL_GWOL		0x00000005 /* Get wake-on-lan options. */
+#define ETHTOOL_SWOL		0x00000006 /* Set wake-on-lan options, priv. */
+#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
+#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv. */
+#define ETHTOOL_GLINK		0x0000000a /* Get link status (ethtool_value) */
+#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data, priv. */
+#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
+#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config, priv. */
+#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
+#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
+#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
+#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters, priv. */
+#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
+					    * (ethtool_value) */
+#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
+					    * (ethtool_value), priv. */
+#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
+#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
+#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
+#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
+
+/* compatibility with older code */
+#define SPARC_ETH_GSET		ETHTOOL_GSET
+#define SPARC_ETH_SSET		ETHTOOL_SSET
+
+/* Indicates what features are supported by the interface. */
+#define SUPPORTED_10baseT_Half		(1 << 0)
+#define SUPPORTED_10baseT_Full		(1 << 1)
+#define SUPPORTED_100baseT_Half		(1 << 2)
+#define SUPPORTED_100baseT_Full		(1 << 3)
+#define SUPPORTED_1000baseT_Half	(1 << 4)
+#define SUPPORTED_1000baseT_Full	(1 << 5)
+#define SUPPORTED_Autoneg		(1 << 6)
+#define SUPPORTED_TP			(1 << 7)
+#define SUPPORTED_AUI			(1 << 8)
+#define SUPPORTED_MII			(1 << 9)
+#define SUPPORTED_FIBRE			(1 << 10)
+#define SUPPORTED_BNC			(1 << 11)
+
+/* Indicates what features are advertised by the interface. */
+#define ADVERTISED_10baseT_Half		(1 << 0)
+#define ADVERTISED_10baseT_Full		(1 << 1)
+#define ADVERTISED_100baseT_Half	(1 << 2)
+#define ADVERTISED_100baseT_Full	(1 << 3)
+#define ADVERTISED_1000baseT_Half	(1 << 4)
+#define ADVERTISED_1000baseT_Full	(1 << 5)
+#define ADVERTISED_Autoneg		(1 << 6)
+#define ADVERTISED_TP			(1 << 7)
+#define ADVERTISED_AUI			(1 << 8)
+#define ADVERTISED_MII			(1 << 9)
+#define ADVERTISED_FIBRE		(1 << 10)
+#define ADVERTISED_BNC			(1 << 11)
+
+/* The following are all involved in forcing a particular link
+ * mode for the device for setting things.  When getting the
+ * devices settings, these indicate the current mode and whether
+ * it was foced up into this mode or autonegotiated.
+ */
+
+/* The forced speed, 10Mb, 100Mb, gigabit. */
+#define SPEED_10		10
+#define SPEED_100		100
+#define SPEED_1000		1000
+
+/* Duplex, half or full. */
+#define DUPLEX_HALF		0x00
+#define DUPLEX_FULL		0x01
+
+/* Which connector port. */
+#define PORT_TP			0x00
+#define PORT_AUI		0x01
+#define PORT_MII		0x02
+#define PORT_FIBRE		0x03
+#define PORT_BNC		0x04
+
+/* Which tranceiver to use. */
+#define XCVR_INTERNAL		0x00
+#define XCVR_EXTERNAL		0x01
+#define XCVR_DUMMY1		0x02
+#define XCVR_DUMMY2		0x03
+#define XCVR_DUMMY3		0x04
+
+/* Enable or disable autonegotiation.  If this is set to enable,
+ * the forced link modes above are completely ignored.
+ */
+#define AUTONEG_DISABLE		0x00
+#define AUTONEG_ENABLE		0x01
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY		(1 << 0)
+#define WAKE_UCAST		(1 << 1)
+#define WAKE_MCAST		(1 << 2)
+#define WAKE_BCAST		(1 << 3)
+#define WAKE_ARP		(1 << 4)
+#define WAKE_MAGIC		(1 << 5)
+#define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#endif /* _LINUX_ETHTOOL_H */
diff --git a/xen/include/xeno/event.h b/xen/include/xeno/event.h
new file mode 100644
index 0000000000..fdb9fed24d
--- /dev/null
+++ b/xen/include/xeno/event.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ * event.h
+ * 
+ * A nice interface for passing asynchronous events to guest OSes.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/bitops.h>
+
+#ifdef CONFIG_SMP
+
+/*
+ * mark_guest_event:
+ *  @p:        Domain to which event should be passed
+ *  @event:    Event number
+ *  RETURNS:   "Bitmask" of CPU on which process is currently running
+ * 
+ * Idea is that caller may loop on task_list, looking for domains
+ * to pass events to (using this function). The caller accumulates the
+ * bits returned by this function (ORing them together) then calls
+ * event_notify().
+ * 
+ * Guest_events are per-domain events passed directly to the guest OS
+ * in ring 1. 
+ */
+static inline unsigned long mark_guest_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->shared_info->events);
+
+    /*
+     * No need for the runqueue_lock! The check below does not race
+     * with the setting of has_cpu, because that is set with runqueue_lock
+     * held. The lock must be released before hypervisor exit (and so
+     * a write barrier executed). And, just before hypervisor exit, 
+     * outstanding events are checked. So bit is certainly set early enough.
+     */
+    smp_mb();
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return p->has_cpu ? (1 << p->processor) : 0;
+}
+
+/* As above, but hyp_events are handled within the hypervisor. */
+static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->hyp_events);
+    smp_mb();
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return p->has_cpu ? (1 << p->processor) : 0;
+}
+
+/* Notify the given set of CPUs that guest events may be outstanding. */
+static inline void guest_event_notify(unsigned long cpu_mask)
+{
+    cpu_mask &= ~(1 << smp_processor_id());
+    if ( cpu_mask != 0 ) smp_send_event_check_mask(cpu_mask);
+}
+
+#else
+
+static inline unsigned long mark_guest_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->shared_info->events);
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return 0;
+}
+
+static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->hyp_events);
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return 0;
+}
+
+#define guest_event_notify(_mask) ((void)0)
+
+#endif
+
+/* Notify hypervisor events in thesame way as for guest OS events. */
+#define hyp_event_notify(_mask) guest_event_notify(_mask)
+
+/* Clear a guest-OS event from a per-domain mask. */
+static inline void clear_guest_event(struct task_struct *p, int event)
+{
+    clear_bit(event, &p->shared_info->events);
+}
+
+/* Clear a hypervisor event from a per-domain mask. */
+static inline void clear_hyp_event(struct task_struct *p, int event)
+{
+    clear_bit(event, &p->hyp_events);
+}
+
+/* Called on return from (architecture-dependent) entry.S. */
+void do_hyp_events(void);
diff --git a/xen/include/xeno/genhd.h b/xen/include/xeno/genhd.h
new file mode 100644
index 0000000000..58a1734a56
--- /dev/null
+++ b/xen/include/xeno/genhd.h
@@ -0,0 +1,313 @@
+#ifndef _LINUX_GENHD_H
+#define _LINUX_GENHD_H
+
+/*
+ * 	genhd.h Copyright (C) 1992 Drew Eckhardt
+ *	Generic hard disk header file by  
+ * 		Drew Eckhardt
+ *
+ *		<drew@colorado.edu>
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/major.h>
+
+enum {
+/* These three have identical behaviour; use the second one if DOS fdisk gets
+   confused about extended/logical partitions starting past cylinder 1023. */
+	DOS_EXTENDED_PARTITION = 5,
+	LINUX_EXTENDED_PARTITION = 0x85,
+	WIN98_EXTENDED_PARTITION = 0x0f,
+
+	LINUX_SWAP_PARTITION = 0x82,
+	LINUX_RAID_PARTITION = 0xfd,	/* autodetect RAID partition */
+
+	SOLARIS_X86_PARTITION =	LINUX_SWAP_PARTITION,
+
+	DM6_PARTITION =	0x54,	/* has DDO: use xlated geom & offset */
+	EZD_PARTITION =	0x55,	/* EZ-DRIVE */
+	DM6_AUX1PARTITION = 0x51,	/* no DDO:  use xlated geom */
+	DM6_AUX3PARTITION = 0x53,	/* no DDO:  use xlated geom */
+
+	FREEBSD_PARTITION = 0xa5,    /* FreeBSD Partition ID */
+	OPENBSD_PARTITION = 0xa6,    /* OpenBSD Partition ID */
+	NETBSD_PARTITION = 0xa9,   /* NetBSD Partition ID */
+	BSDI_PARTITION = 0xb7,    /* BSDI Partition ID */
+/* Ours is not to wonder why.. */
+	BSD_PARTITION =	FREEBSD_PARTITION,
+	MINIX_PARTITION = 0x81,  /* Minix Partition ID */
+	PLAN9_PARTITION = 0x39,  /* Plan 9 Partition ID */
+	UNIXWARE_PARTITION = 0x63,		/* Partition ID, same as */
+						/* GNU_HURD and SCO Unix */
+};
+
+struct partition {
+	unsigned char boot_ind;		/* 0x80 - active */
+	unsigned char head;		/* starting head */
+	unsigned char sector;		/* starting sector */
+	unsigned char cyl;		/* starting cylinder */
+	unsigned char sys_ind;		/* What partition type */
+	unsigned char end_head;		/* end head */
+	unsigned char end_sector;	/* end sector */
+	unsigned char end_cyl;		/* end cylinder */
+	unsigned int start_sect;	/* starting sector counting from 0 */
+	unsigned int nr_sects;		/* nr of sectors in partition */
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+/*#  include <linux/devfs_fs_kernel.h>*/
+
+struct hd_struct {
+	unsigned long start_sect;
+	unsigned long nr_sects;
+    /*devfs_handle_t de;*/              /* primary (master) devfs entry  */
+	int number;                     /* stupid old code wastes space  */
+
+	/* Performance stats: */
+	unsigned int ios_in_flight;
+	unsigned int io_ticks;
+	unsigned int last_idle_time;
+	unsigned int last_queue_change;
+	unsigned int aveq;
+	
+	unsigned int rd_ios;
+	unsigned int rd_merges;
+	unsigned int rd_ticks;
+	unsigned int rd_sectors;
+	unsigned int wr_ios;
+	unsigned int wr_merges;
+	unsigned int wr_ticks;
+	unsigned int wr_sectors;	
+};
+
+#define GENHD_FL_REMOVABLE  1
+
+struct gendisk {
+	int major;			/* major number of driver */
+	const char *major_name;		/* name of major driver */
+	int minor_shift;		/* number of times minor is shifted to
+					   get real minor */
+	int max_p;			/* maximum partitions per device */
+
+	struct hd_struct *part;		/* [indexed by minor] */
+	int *sizes;			/* [idem], device size in blocks */
+	int nr_real;			/* number of real devices */
+
+	void *real_devices;		/* internal use */
+	struct gendisk *next;
+	struct block_device_operations *fops;
+
+    /*devfs_handle_t *de_arr;*/         /* one per physical disc */
+	char *flags;                    /* one per physical disc */
+};
+
+/* drivers/block/genhd.c */
+extern struct gendisk *gendisk_head;
+
+extern void add_gendisk(struct gendisk *gp);
+extern void del_gendisk(struct gendisk *gp);
+extern struct gendisk *get_gendisk(kdev_t dev);
+extern int walk_gendisk(int (*walk)(struct gendisk *, void *), void *);
+
+#endif  /*  __KERNEL__  */
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+
+#define SOLARIS_X86_NUMSLICE	8
+#define SOLARIS_X86_VTOC_SANE	(0x600DDEEEUL)
+
+struct solaris_x86_slice {
+	ushort	s_tag;			/* ID tag of partition */
+	ushort	s_flag;			/* permission flags */
+	unsigned int s_start;		/* start sector no of partition */
+	unsigned int s_size;		/* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+	unsigned int v_bootinfo[3];	/* info needed by mboot (unsupported) */
+	unsigned int v_sanity;		/* to verify vtoc sanity */
+	unsigned int v_version;		/* layout version */
+	char	v_volume[8];		/* volume name */
+	ushort	v_sectorsz;		/* sector size in bytes */
+	ushort	v_nparts;		/* number of partitions */
+	unsigned int v_reserved[10];	/* free space */
+	struct solaris_x86_slice
+		v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
+	unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */
+	char	v_asciilabel[128];	/* for compatibility */
+};
+
+#endif /* CONFIG_SOLARIS_X86_PARTITION */
+
+#ifdef CONFIG_BSD_DISKLABEL
+/*
+ * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
+ * updated by Marc Espie <Marc.Espie@openbsd.org>
+ */
+
+/* check against BSD src/sys/sys/disklabel.h for consistency */
+
+#define BSD_DISKMAGIC	(0x82564557UL)	/* The disk magic number */
+#define BSD_MAXPARTITIONS	8
+#define OPENBSD_MAXPARTITIONS	16
+#define BSD_FS_UNUSED		0	/* disklabel unused partition entry ID */
+struct bsd_disklabel {
+	__u32	d_magic;		/* the magic number */
+	__s16	d_type;			/* drive type */
+	__s16	d_subtype;		/* controller/d_type specific */
+	char	d_typename[16];		/* type name, e.g. "eagle" */
+	char	d_packname[16];			/* pack identifier */ 
+	__u32	d_secsize;		/* # of bytes per sector */
+	__u32	d_nsectors;		/* # of data sectors per track */
+	__u32	d_ntracks;		/* # of tracks per cylinder */
+	__u32	d_ncylinders;		/* # of data cylinders per unit */
+	__u32	d_secpercyl;		/* # of data sectors per cylinder */
+	__u32	d_secperunit;		/* # of data sectors per unit */
+	__u16	d_sparespertrack;	/* # of spare sectors per track */
+	__u16	d_sparespercyl;		/* # of spare sectors per cylinder */
+	__u32	d_acylinders;		/* # of alt. cylinders per unit */
+	__u16	d_rpm;			/* rotational speed */
+	__u16	d_interleave;		/* hardware sector interleave */
+	__u16	d_trackskew;		/* sector 0 skew, per track */
+	__u16	d_cylskew;		/* sector 0 skew, per cylinder */
+	__u32	d_headswitch;		/* head switch time, usec */
+	__u32	d_trkseek;		/* track-to-track seek, usec */
+	__u32	d_flags;		/* generic flags */
+#define NDDATA 5
+	__u32	d_drivedata[NDDATA];	/* drive-type specific information */
+#define NSPARE 5
+	__u32	d_spare[NSPARE];	/* reserved for future use */
+	__u32	d_magic2;		/* the magic number (again) */
+	__u16	d_checksum;		/* xor of data incl. partitions */
+
+			/* filesystem and partition information: */
+	__u16	d_npartitions;		/* number of partitions in following */
+	__u32	d_bbsize;		/* size of boot area at sn0, bytes */
+	__u32	d_sbsize;		/* max size of fs superblock, bytes */
+	struct	bsd_partition {		/* the partition table */
+		__u32	p_size;		/* number of sectors in partition */
+		__u32	p_offset;	/* starting sector */
+		__u32	p_fsize;	/* filesystem basic fragment size */
+		__u8	p_fstype;	/* filesystem type, see below */
+		__u8	p_frag;		/* filesystem fragments per block */
+		__u16	p_cpg;		/* filesystem cylinders per group */
+	} d_partitions[BSD_MAXPARTITIONS];	/* actually may be more */
+};
+
+#endif	/* CONFIG_BSD_DISKLABEL */
+
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+/*
+ * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
+ * and Krzysztof G. Baranowski <kgb@knm.org.pl>
+ */
+
+#define UNIXWARE_DISKMAGIC     (0xCA5E600DUL)	/* The disk magic number */
+#define UNIXWARE_DISKMAGIC2    (0x600DDEEEUL)	/* The slice table magic nr */
+#define UNIXWARE_NUMSLICE      16
+#define UNIXWARE_FS_UNUSED     0		/* Unused slice entry ID */
+
+struct unixware_slice {
+	__u16   s_label;	/* label */
+	__u16   s_flags;	/* permission flags */
+	__u32   start_sect;	/* starting sector */
+	__u32   nr_sects;	/* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+	__u32   d_type;               	/* drive type */
+	__u32   d_magic;                /* the magic number */
+	__u32   d_version;              /* version number */
+	char    d_serial[12];           /* serial number of the device */
+	__u32   d_ncylinders;           /* # of data cylinders per device */
+	__u32   d_ntracks;              /* # of tracks per cylinder */
+	__u32   d_nsectors;             /* # of data sectors per track */
+	__u32   d_secsize;              /* # of bytes per sector */
+	__u32   d_part_start;           /* # of first sector of this partition */
+	__u32   d_unknown1[12];         /* ? */
+ 	__u32	d_alt_tbl;              /* byte offset of alternate table */
+ 	__u32	d_alt_len;              /* byte length of alternate table */
+ 	__u32	d_phys_cyl;             /* # of physical cylinders per device */
+ 	__u32	d_phys_trk;             /* # of physical tracks per cylinder */
+ 	__u32	d_phys_sec;             /* # of physical sectors per track */
+ 	__u32	d_phys_bytes;           /* # of physical bytes per sector */
+ 	__u32	d_unknown2;             /* ? */
+	__u32   d_unknown3;             /* ? */
+	__u32	d_pad[8];               /* pad */
+
+	struct unixware_vtoc {
+		__u32	v_magic;		/* the magic number */
+		__u32	v_version;		/* version number */
+		char	v_name[8];		/* volume name */
+		__u16	v_nslices;		/* # of slices */
+		__u16	v_unknown1;		/* ? */
+		__u32	v_reserved[10];		/* reserved */
+		struct unixware_slice
+			v_slice[UNIXWARE_NUMSLICE];	/* slice headers */
+	} vtoc;
+
+};  /* 408 */
+
+#endif /* CONFIG_UNIXWARE_DISKLABEL */
+
+#ifdef CONFIG_MINIX_SUBPARTITION
+#   define MINIX_NR_SUBPARTITIONS  4
+#endif /* CONFIG_MINIX_SUBPARTITION */
+
+#ifdef __KERNEL__
+
+char *disk_name (struct gendisk *hd, int minor, char *buf);
+
+/*
+ * disk_round_stats is used to round off the IO statistics for a disk
+ * for a complete clock tick.
+ */
+void disk_round_stats(struct hd_struct *hd);
+
+/* 
+ * Account for the completion of an IO request (used by drivers which 
+ * bypass the normal end_request processing) 
+ */
+struct request;
+void req_finished_io(struct request *);
+
+#ifdef DEVFS_MUST_DIE
+extern void devfs_register_partitions (struct gendisk *dev, int minor,
+				       int unregister);
+#endif
+
+
+
+/*
+ * FIXME: this should use genhd->minor_shift, but that is slow to look up.
+ */
+static inline unsigned int disk_index (kdev_t dev)
+{
+	int major = MAJOR(dev);
+	int minor = MINOR(dev);
+	unsigned int index;
+
+	switch (major) {
+		case DAC960_MAJOR+0:
+			index = (minor & 0x00f8) >> 3;
+			break;
+		case SCSI_DISK0_MAJOR:
+			index = (minor & 0x00f0) >> 4;
+			break;
+		case IDE0_MAJOR:	/* same as HD_MAJOR */
+		case XT_DISK_MAJOR:
+			index = (minor & 0x0040) >> 6;
+			break;
+		case IDE1_MAJOR:
+			index = ((minor & 0x0040) >> 6) + 2;
+			break;
+		default:
+			return 0;
+	}
+	return index;
+}
+
+#endif
+
+#endif
diff --git a/xen/include/xeno/hdreg.h b/xen/include/xeno/hdreg.h
new file mode 100644
index 0000000000..703b750110
--- /dev/null
+++ b/xen/include/xeno/hdreg.h
@@ -0,0 +1,662 @@
+#ifndef _LINUX_HDREG_H
+#define _LINUX_HDREG_H
+
+/*
+ * This file contains some defines for the AT-hd-controller.
+ * Various sources.  
+ */
+
+#define HD_IRQ 14			/* the standard disk interrupt */
+
+/* ide.c has its own port definitions in "ide.h" */
+
+/* Hd controller regs. Ref: IBM AT Bios-listing */
+#define HD_DATA		0x1f0		/* _CTL when writing */
+#define HD_ERROR	0x1f1		/* see err-bits */
+#define HD_NSECTOR	0x1f2		/* nr of sectors to read/write */
+#define HD_SECTOR	0x1f3		/* starting sector */
+#define HD_LCYL		0x1f4		/* starting cylinder */
+#define HD_HCYL		0x1f5		/* high byte of starting cyl */
+#define HD_CURRENT	0x1f6		/* 101dhhhh , d=drive, hhhh=head */
+#define HD_STATUS	0x1f7		/* see status-bits */
+#define HD_FEATURE	HD_ERROR	/* same io address, read=error, write=feature */
+#define HD_PRECOMP	HD_FEATURE	/* obsolete use of this port - predates IDE */
+#define HD_COMMAND	HD_STATUS	/* same io address, read=status, write=cmd */
+
+#define HD_CMD		0x3f6		/* used for resets */
+#define HD_ALTSTATUS	0x3f6		/* same as HD_STATUS but doesn't clear irq */
+
+/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */
+
+/* Bits of HD_STATUS */
+#define ERR_STAT		0x01
+#define INDEX_STAT		0x02
+#define ECC_STAT		0x04	/* Corrected error */
+#define DRQ_STAT		0x08
+#define SEEK_STAT		0x10
+#define WRERR_STAT		0x20
+#define READY_STAT		0x40
+#define BUSY_STAT		0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR		0x01	/* Bad address mark */
+#define TRK0_ERR		0x02	/* couldn't find track 0 */
+#define ABRT_ERR		0x04	/* Command aborted */
+#define MCR_ERR			0x08	/* media change request */
+#define ID_ERR			0x10	/* ID field not found */
+#define MC_ERR			0x20	/* media changed */
+#define ECC_ERR			0x40	/* Uncorrectable ECC error */
+#define BBD_ERR			0x80	/* pre-EIDE meaning:  block marked bad */
+#define ICRC_ERR		0x80	/* new meaning:  CRC error during transfer */
+
+/*
+ * Command Header sizes for IOCTL commands
+ *	HDIO_DRIVE_CMD, HDIO_DRIVE_TASK, and HDIO_DRIVE_TASKFILE
+ */
+
+#if 0
+#include <asm/hdreg.h>
+typedef ide_ioreg_t task_ioreg_t;
+#else
+typedef unsigned char task_ioreg_t;
+#endif
+
+#define HDIO_DRIVE_CMD_HDR_SIZE		4*sizeof(task_ioreg_t)
+#define HDIO_DRIVE_TASK_HDR_SIZE	8*sizeof(task_ioreg_t)
+#define HDIO_DRIVE_HOB_HDR_SIZE		8*sizeof(task_ioreg_t)
+
+#define IDE_DRIVE_TASK_INVALID		-1
+#define IDE_DRIVE_TASK_NO_DATA		0
+#define IDE_DRIVE_TASK_SET_XFER		1
+
+#define IDE_DRIVE_TASK_IN		2
+
+#define IDE_DRIVE_TASK_OUT		3
+#define IDE_DRIVE_TASK_RAW_WRITE	4
+
+struct hd_drive_cmd_hdr {
+	task_ioreg_t command;
+	task_ioreg_t sector_number;
+	task_ioreg_t feature;
+	task_ioreg_t sector_count;
+};
+
+typedef struct hd_drive_task_hdr {
+	task_ioreg_t data;
+	task_ioreg_t feature;
+	task_ioreg_t sector_count;
+	task_ioreg_t sector_number;
+	task_ioreg_t low_cylinder;
+	task_ioreg_t high_cylinder;
+	task_ioreg_t device_head;
+	task_ioreg_t command;
+} task_struct_t;
+
+typedef struct hd_drive_hob_hdr {
+	task_ioreg_t data;
+	task_ioreg_t feature;
+	task_ioreg_t sector_count;
+	task_ioreg_t sector_number;
+	task_ioreg_t low_cylinder;
+	task_ioreg_t high_cylinder;
+	task_ioreg_t device_head;
+	task_ioreg_t control;
+} hob_struct_t;
+
+typedef union ide_reg_valid_s {
+	unsigned all				: 16;
+	struct {
+		unsigned data			: 1;
+		unsigned error_feature		: 1;
+		unsigned sector			: 1;
+		unsigned nsector		: 1;
+		unsigned lcyl			: 1;
+		unsigned hcyl			: 1;
+		unsigned select			: 1;
+		unsigned status_command		: 1;
+
+		unsigned data_hob		: 1;
+		unsigned error_feature_hob	: 1;
+		unsigned sector_hob		: 1;
+		unsigned nsector_hob		: 1;
+		unsigned lcyl_hob		: 1;
+		unsigned hcyl_hob		: 1;
+		unsigned select_hob		: 1;
+		unsigned control_hob		: 1;
+	} b;
+} ide_reg_valid_t;
+
+/*
+ * Define standard taskfile in/out register
+ */
+#define IDE_TASKFILE_STD_OUT_FLAGS	0xFE
+#define IDE_TASKFILE_STD_IN_FLAGS	0xFE
+#define IDE_HOB_STD_OUT_FLAGS		0xC0
+#define IDE_HOB_STD_IN_FLAGS		0xC0
+
+typedef struct ide_task_request_s {
+	task_ioreg_t	io_ports[8];
+	task_ioreg_t	hob_ports[8];
+	ide_reg_valid_t	out_flags;
+	ide_reg_valid_t	in_flags;
+	int		data_phase;
+	int		req_cmd;
+	unsigned long	out_size;
+	unsigned long	in_size;
+} ide_task_request_t;
+
+typedef struct ide_ioctl_request_s {
+	ide_task_request_t	*task_request;
+	unsigned char		*out_buffer;
+	unsigned char		*in_buffer;
+} ide_ioctl_request_t;
+
+#define TASKFILE_INVALID		0x7fff
+#define TASKFILE_48			0x8000
+
+#define TASKFILE_NO_DATA		0x0000
+
+#define TASKFILE_IN			0x0001
+#define TASKFILE_MULTI_IN		0x0002
+
+#define TASKFILE_OUT			0x0004
+#define TASKFILE_MULTI_OUT		0x0008
+#define TASKFILE_IN_OUT			0x0010
+
+#define TASKFILE_IN_DMA			0x0020
+#define TASKFILE_OUT_DMA		0x0040
+#define TASKFILE_IN_DMAQ		0x0080
+#define TASKFILE_OUT_DMAQ		0x0100
+
+#define TASKFILE_P_IN			0x0200
+#define TASKFILE_P_OUT			0x0400
+#define TASKFILE_P_IN_DMA		0x0800
+#define TASKFILE_P_OUT_DMA		0x1000
+#define TASKFILE_P_IN_DMAQ		0x2000
+#define TASKFILE_P_OUT_DMAQ		0x4000
+
+/* ATA/ATAPI Commands pre T13 Spec */
+#define WIN_NOP				0x00
+#define CFA_REQ_EXT_ERROR_CODE		0x03 /* CFA Request Extended Error Code */
+#define WIN_SRST			0x08 /* ATAPI soft reset command */
+#define WIN_DEVICE_RESET		0x08
+#define WIN_RESTORE			0x10
+#define WIN_READ			0x20 /* 28-Bit */
+#define WIN_READ_EXT			0x24 /* 48-Bit */
+#define WIN_READDMA_EXT			0x25 /* 48-Bit */
+#define WIN_READDMA_QUEUED_EXT		0x26 /* 48-Bit */
+#define WIN_READ_NATIVE_MAX_EXT		0x27 /* 48-Bit */
+#define WIN_MULTREAD_EXT		0x29 /* 48-Bit */
+#define WIN_WRITE			0x30 /* 28-Bit */
+#define WIN_WRITE_EXT			0x34 /* 48-Bit */
+#define WIN_WRITEDMA_EXT		0x35 /* 48-Bit */
+#define WIN_WRITEDMA_QUEUED_EXT		0x36 /* 48-Bit */
+#define WIN_SET_MAX_EXT			0x37 /* 48-Bit */
+#define CFA_WRITE_SECT_WO_ERASE		0x38 /* CFA Write Sectors without erase */
+#define WIN_MULTWRITE_EXT		0x39 /* 48-Bit */
+#define WIN_WRITE_VERIFY		0x3C /* 28-Bit */
+#define WIN_VERIFY			0x40 /* 28-Bit - Read Verify Sectors */
+#define WIN_VERIFY_EXT			0x42 /* 48-Bit */
+#define WIN_FORMAT			0x50
+#define WIN_INIT			0x60
+#define WIN_SEEK			0x70
+#define CFA_TRANSLATE_SECTOR		0x87 /* CFA Translate Sector */
+#define WIN_DIAGNOSE			0x90
+#define WIN_SPECIFY			0x91 /* set drive geometry translation */
+#define WIN_DOWNLOAD_MICROCODE		0x92
+#define WIN_STANDBYNOW2			0x94
+#define WIN_SETIDLE2			0x97
+#define WIN_CHECKPOWERMODE2		0x98
+#define WIN_SLEEPNOW2			0x99
+#define WIN_PACKETCMD			0xA0 /* Send a packet command. */
+#define WIN_PIDENTIFY			0xA1 /* identify ATAPI device	*/
+#define WIN_QUEUED_SERVICE		0xA2
+#define WIN_SMART			0xB0 /* self-monitoring and reporting */
+#define CFA_ERASE_SECTORS       	0xC0
+#define WIN_MULTREAD			0xC4 /* read sectors using multiple mode*/
+#define WIN_MULTWRITE			0xC5 /* write sectors using multiple mode */
+#define WIN_SETMULT			0xC6 /* enable/disable multiple mode */
+#define WIN_READDMA_QUEUED		0xC7 /* read sectors using Queued DMA transfers */
+#define WIN_READDMA			0xC8 /* read sectors using DMA transfers */
+#define WIN_WRITEDMA			0xCA /* write sectors using DMA transfers */
+#define WIN_WRITEDMA_QUEUED		0xCC /* write sectors using Queued DMA transfers */
+#define CFA_WRITE_MULTI_WO_ERASE	0xCD /* CFA Write multiple without erase */
+#define WIN_GETMEDIASTATUS		0xDA	
+#define WIN_DOORLOCK			0xDE /* lock door on removable drives */
+#define WIN_DOORUNLOCK			0xDF /* unlock door on removable drives */
+#define WIN_STANDBYNOW1			0xE0
+#define WIN_IDLEIMMEDIATE		0xE1 /* force drive to become "ready" */
+#define WIN_STANDBY             	0xE2 /* Set device in Standby Mode */
+#define WIN_SETIDLE1			0xE3
+#define WIN_READ_BUFFER			0xE4 /* force read only 1 sector */
+#define WIN_CHECKPOWERMODE1		0xE5
+#define WIN_SLEEPNOW1			0xE6
+#define WIN_FLUSH_CACHE			0xE7
+#define WIN_WRITE_BUFFER		0xE8 /* force write only 1 sector */
+#define WIN_FLUSH_CACHE_EXT		0xEA /* 48-Bit */
+#define WIN_IDENTIFY			0xEC /* ask drive to identify itself	*/
+#define WIN_MEDIAEJECT			0xED
+#define WIN_IDENTIFY_DMA		0xEE /* same as WIN_IDENTIFY, but DMA */
+#define WIN_SETFEATURES			0xEF /* set special drive features */
+#define EXABYTE_ENABLE_NEST		0xF0
+#define WIN_SECURITY_SET_PASS		0xF1
+#define WIN_SECURITY_UNLOCK		0xF2
+#define WIN_SECURITY_ERASE_PREPARE	0xF3
+#define WIN_SECURITY_ERASE_UNIT		0xF4
+#define WIN_SECURITY_FREEZE_LOCK	0xF5
+#define WIN_SECURITY_DISABLE		0xF6
+#define WIN_READ_NATIVE_MAX		0xF8 /* return the native maximum address */
+#define WIN_SET_MAX			0xF9
+#define DISABLE_SEAGATE			0xFB
+
+/* WIN_SMART sub-commands */
+
+#define SMART_READ_VALUES		0xD0
+#define SMART_READ_THRESHOLDS		0xD1
+#define SMART_AUTOSAVE			0xD2
+#define SMART_SAVE			0xD3
+#define SMART_IMMEDIATE_OFFLINE		0xD4
+#define SMART_READ_LOG_SECTOR		0xD5
+#define SMART_WRITE_LOG_SECTOR		0xD6
+#define SMART_WRITE_THRESHOLDS		0xD7
+#define SMART_ENABLE			0xD8
+#define SMART_DISABLE			0xD9
+#define SMART_STATUS			0xDA
+#define SMART_AUTO_OFFLINE		0xDB
+
+/* Password used in TF4 & TF5 executing SMART commands */
+
+#define SMART_LCYL_PASS			0x4F
+#define SMART_HCYL_PASS			0xC2
+		
+/* WIN_SETFEATURES sub-commands */
+
+#define SETFEATURES_EN_WCACHE	0x02	/* Enable write cache */
+#define SETFEATURES_XFER	0x03	/* Set transfer mode */
+#	define XFER_UDMA_7	0x47	/* 0100|0111 */
+#	define XFER_UDMA_6	0x46	/* 0100|0110 */
+#	define XFER_UDMA_5	0x45	/* 0100|0101 */
+#	define XFER_UDMA_4	0x44	/* 0100|0100 */
+#	define XFER_UDMA_3	0x43	/* 0100|0011 */
+#	define XFER_UDMA_2	0x42	/* 0100|0010 */
+#	define XFER_UDMA_1	0x41	/* 0100|0001 */
+#	define XFER_UDMA_0	0x40	/* 0100|0000 */
+#	define XFER_MW_DMA_2	0x22	/* 0010|0010 */
+#	define XFER_MW_DMA_1	0x21	/* 0010|0001 */
+#	define XFER_MW_DMA_0	0x20	/* 0010|0000 */
+#	define XFER_SW_DMA_2	0x12	/* 0001|0010 */
+#	define XFER_SW_DMA_1	0x11	/* 0001|0001 */
+#	define XFER_SW_DMA_0	0x10	/* 0001|0000 */
+#	define XFER_PIO_4	0x0C	/* 0000|1100 */
+#	define XFER_PIO_3	0x0B	/* 0000|1011 */
+#	define XFER_PIO_2	0x0A	/* 0000|1010 */
+#	define XFER_PIO_1	0x09	/* 0000|1001 */
+#	define XFER_PIO_0	0x08	/* 0000|1000 */
+#	define XFER_PIO_SLOW	0x00	/* 0000|0000 */
+#define SETFEATURES_DIS_DEFECT	0x04	/* Disable Defect Management */
+#define SETFEATURES_EN_APM	0x05	/* Enable advanced power management */
+#define SETFEATURES_DIS_MSN	0x31	/* Disable Media Status Notification */
+#define SETFEATURES_EN_AAM	0x42	/* Enable Automatic Acoustic Management */
+#define SETFEATURES_DIS_RLA	0x55	/* Disable read look-ahead feature */
+#define SETFEATURES_EN_RI	0x5D	/* Enable release interrupt */
+#define SETFEATURES_EN_SI	0x5E	/* Enable SERVICE interrupt */
+#define SETFEATURES_DIS_RPOD	0x66	/* Disable reverting to power on defaults */
+#define SETFEATURES_DIS_WCACHE	0x82	/* Disable write cache */
+#define SETFEATURES_EN_DEFECT	0x84	/* Enable Defect Management */
+#define SETFEATURES_DIS_APM	0x85	/* Disable advanced power management */
+#define SETFEATURES_EN_MSN	0x95	/* Enable Media Status Notification */
+#define SETFEATURES_EN_RLA	0xAA	/* Enable read look-ahead feature */
+#define SETFEATURES_PREFETCH	0xAB	/* Sets drive prefetch value */
+#define SETFEATURES_DIS_AAM	0xC2	/* Disable Automatic Acoustic Management */
+#define SETFEATURES_EN_RPOD	0xCC	/* Enable reverting to power on defaults */
+#define SETFEATURES_DIS_RI	0xDD	/* Disable release interrupt */
+#define SETFEATURES_DIS_SI	0xDE	/* Disable SERVICE interrupt */
+
+/* WIN_SECURITY sub-commands */
+
+#define SECURITY_SET_PASSWORD		0xBA
+#define SECURITY_UNLOCK			0xBB
+#define SECURITY_ERASE_PREPARE		0xBC
+#define SECURITY_ERASE_UNIT		0xBD
+#define SECURITY_FREEZE_LOCK		0xBE
+#define SECURITY_DISABLE_PASSWORD	0xBF
+
+struct hd_geometry {
+      unsigned char heads;
+      unsigned char sectors;
+      unsigned short cylinders;
+      unsigned long start;
+};
+
+/* BIG GEOMETRY */
+struct hd_big_geometry {
+	unsigned char heads;
+	unsigned char sectors;
+	unsigned int cylinders;
+	unsigned long start;
+};
+
+/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x030n/0x031n */
+#define HDIO_GETGEO		0x0301	/* get device geometry */
+#define HDIO_GET_UNMASKINTR	0x0302	/* get current unmask setting */
+#define HDIO_GET_MULTCOUNT	0x0304	/* get current IDE blockmode setting */
+#define HDIO_GET_QDMA		0x0305	/* get use-qdma flag */
+#define HDIO_OBSOLETE_IDENTITY	0x0307	/* OBSOLETE, DO NOT USE: returns 142 bytes */
+#define HDIO_GET_KEEPSETTINGS	0x0308	/* get keep-settings-on-reset flag */
+#define HDIO_GET_32BIT		0x0309	/* get current io_32bit setting */
+#define HDIO_GET_NOWERR		0x030a	/* get ignore-write-error flag */
+#define HDIO_GET_DMA		0x030b	/* get use-dma flag */
+#define HDIO_GET_NICE		0x030c	/* get nice flags */
+#define HDIO_GET_IDENTITY	0x030d	/* get IDE identification info */
+#define HDIO_GET_WCACHE		0x030e	/* get write cache mode on|off */
+#define HDIO_GET_ACOUSTIC	0x030f	/* get acoustic value */
+#define	HDIO_GET_ADDRESS	0x0310	/* */
+
+#define HDIO_GET_BUSSTATE	0x031a	/* get the bus state of the hwif */
+#define HDIO_TRISTATE_HWIF	0x031b	/* execute a channel tristate */
+#define HDIO_DRIVE_RESET	0x031c	/* execute a device reset */
+#define HDIO_DRIVE_TASKFILE	0x031d	/* execute raw taskfile */
+#define HDIO_DRIVE_TASK		0x031e	/* execute task and special drive command */
+#define HDIO_DRIVE_CMD		0x031f	/* execute a special drive command */
+
+#define HDIO_DRIVE_CMD_AEB	HDIO_DRIVE_TASK
+
+/* hd/ide ctl's that pass (arg) non-ptr values are numbered 0x032n/0x033n */
+#define HDIO_SET_MULTCOUNT	0x0321	/* change IDE blockmode */
+#define HDIO_SET_UNMASKINTR	0x0322	/* permit other irqs during I/O */
+#define HDIO_SET_KEEPSETTINGS	0x0323	/* keep ioctl settings on reset */
+#define HDIO_SET_32BIT		0x0324	/* change io_32bit flags */
+#define HDIO_SET_NOWERR		0x0325	/* change ignore-write-error flag */
+#define HDIO_SET_DMA		0x0326	/* change use-dma flag */
+#define HDIO_SET_PIO_MODE	0x0327	/* reconfig interface to new speed */
+#define HDIO_SCAN_HWIF		0x0328	/* register and (re)scan interface */
+#define HDIO_SET_NICE		0x0329	/* set nice flags */
+#define HDIO_UNREGISTER_HWIF	0x032a  /* unregister interface */
+#define HDIO_SET_WCACHE		0x032b	/* change write cache enable-disable */
+#define HDIO_SET_ACOUSTIC	0x032c	/* change acoustic behavior */
+#define HDIO_SET_BUSSTATE	0x032d	/* set the bus state of the hwif */
+#define HDIO_SET_QDMA		0x032e	/* change use-qdma flag */
+#define HDIO_SET_ADDRESS	0x032f	/* change lba addressing modes */
+
+/* bus states */
+enum {
+	BUSSTATE_OFF = 0,
+	BUSSTATE_ON,
+	BUSSTATE_TRISTATE
+};
+
+/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x033n/0x033n */
+#define HDIO_GETGEO_BIG		0x0330	/* */
+#define HDIO_GETGEO_BIG_RAW	0x0331	/* */
+
+#define __NEW_HD_DRIVE_ID
+/* structure returned by HDIO_GET_IDENTITY,
+ * as per ANSI NCITS ATA6 rev.1b spec
+ */
+struct hd_driveid {
+	unsigned short	config;		/* lots of obsolete bit flags */
+	unsigned short	cyls;		/* Obsolete, "physical" cyls */
+	unsigned short	reserved2;	/* reserved (word 2) */
+	unsigned short	heads;		/* Obsolete, "physical" heads */
+	unsigned short	track_bytes;	/* unformatted bytes per track */
+	unsigned short	sector_bytes;	/* unformatted bytes per sector */
+	unsigned short	sectors;	/* Obsolete, "physical" sectors per track */
+	unsigned short	vendor0;	/* vendor unique */
+	unsigned short	vendor1;	/* vendor unique */
+	unsigned short	vendor2;	/* Retired vendor unique */
+	unsigned char	serial_no[20];	/* 0 = not_specified */
+	unsigned short	buf_type;	/* Retired */
+	unsigned short	buf_size;	/* Retired, 512 byte increments
+					 * 0 = not_specified
+					 */
+	unsigned short	ecc_bytes;	/* for r/w long cmds; 0 = not_specified */
+	unsigned char	fw_rev[8];	/* 0 = not_specified */
+	unsigned char	model[40];	/* 0 = not_specified */
+	unsigned char	max_multsect;	/* 0=not_implemented */
+	unsigned char	vendor3;	/* vendor unique */
+	unsigned short	dword_io;	/* 0=not_implemented; 1=implemented */
+	unsigned char	vendor4;	/* vendor unique */
+	unsigned char	capability;	/* (upper byte of word 49)
+					 *  3:	IORDYsup
+					 *  2:	IORDYsw
+					 *  1:	LBA
+					 *  0:	DMA
+					 */
+	unsigned short	reserved50;	/* reserved (word 50) */
+	unsigned char	vendor5;	/* Obsolete, vendor unique */
+	unsigned char	tPIO;		/* Obsolete, 0=slow, 1=medium, 2=fast */
+	unsigned char	vendor6;	/* Obsolete, vendor unique */
+	unsigned char	tDMA;		/* Obsolete, 0=slow, 1=medium, 2=fast */
+	unsigned short	field_valid;	/* (word 53)
+					 *  2:	ultra_ok	word  88
+					 *  1:	eide_ok		words 64-70
+					 *  0:	cur_ok		words 54-58
+					 */
+	unsigned short	cur_cyls;	/* Obsolete, logical cylinders */
+	unsigned short	cur_heads;	/* Obsolete, l heads */
+	unsigned short	cur_sectors;	/* Obsolete, l sectors per track */
+	unsigned short	cur_capacity0;	/* Obsolete, l total sectors on drive */
+	unsigned short	cur_capacity1;	/* Obsolete, (2 words, misaligned int)     */
+	unsigned char	multsect;	/* current multiple sector count */
+	unsigned char	multsect_valid;	/* when (bit0==1) multsect is ok */
+	unsigned int	lba_capacity;	/* Obsolete, total number of sectors */
+	unsigned short	dma_1word;	/* Obsolete, single-word dma info */
+	unsigned short	dma_mword;	/* multiple-word dma info */
+	unsigned short  eide_pio_modes; /* bits 0:mode3 1:mode4 */
+	unsigned short  eide_dma_min;	/* min mword dma cycle time (ns) */
+	unsigned short  eide_dma_time;	/* recommended mword dma cycle time (ns) */
+	unsigned short  eide_pio;       /* min cycle time (ns), no IORDY  */
+	unsigned short  eide_pio_iordy; /* min cycle time (ns), with IORDY */
+	unsigned short	words69_70[2];	/* reserved words 69-70
+					 * future command overlap and queuing
+					 */
+	/* HDIO_GET_IDENTITY currently returns only words 0 through 70 */
+	unsigned short	words71_74[4];	/* reserved words 71-74
+					 * for IDENTIFY PACKET DEVICE command
+					 */
+	unsigned short  queue_depth;	/* (word 75)
+					 * 15:5	reserved
+					 *  4:0	Maximum queue depth -1
+					 */
+	unsigned short  words76_79[4];	/* reserved words 76-79 */
+	unsigned short  major_rev_num;	/* (word 80) */
+	unsigned short  minor_rev_num;	/* (word 81) */
+	unsigned short  command_set_1;	/* (word 82) supported
+					 * 15:	Obsolete
+					 * 14:	NOP command
+					 * 13:	READ_BUFFER
+					 * 12:	WRITE_BUFFER
+					 * 11:	Obsolete
+					 * 10:	Host Protected Area
+					 *  9:	DEVICE Reset
+					 *  8:	SERVICE Interrupt
+					 *  7:	Release Interrupt
+					 *  6:	look-ahead
+					 *  5:	write cache
+					 *  4:	PACKET Command
+					 *  3:	Power Management Feature Set
+					 *  2:	Removable Feature Set
+					 *  1:	Security Feature Set
+					 *  0:	SMART Feature Set
+					 */
+	unsigned short  command_set_2;	/* (word 83)
+					 * 15:	Shall be ZERO
+					 * 14:	Shall be ONE
+					 * 13:	FLUSH CACHE EXT
+					 * 12:	FLUSH CACHE
+					 * 11:	Device Configuration Overlay
+					 * 10:	48-bit Address Feature Set
+					 *  9:	Automatic Acoustic Management
+					 *  8:	SET MAX security
+					 *  7:	reserved 1407DT PARTIES
+					 *  6:	SetF sub-command Power-Up
+					 *  5:	Power-Up in Standby Feature Set
+					 *  4:	Removable Media Notification
+					 *  3:	APM Feature Set
+					 *  2:	CFA Feature Set
+					 *  1:	READ/WRITE DMA QUEUED
+					 *  0:	Download MicroCode
+					 */
+	unsigned short  cfsse;		/* (word 84)
+					 * cmd set-feature supported extensions
+					 * 15:	Shall be ZERO
+					 * 14:	Shall be ONE
+					 * 13:3	reserved
+					 *  2:	Media Serial Number Valid
+					 *  1:	SMART selt-test supported
+					 *  0:	SMART error logging
+					 */
+	unsigned short  cfs_enable_1;	/* (word 85)
+					 * command set-feature enabled
+					 * 15:	Obsolete
+					 * 14:	NOP command
+					 * 13:	READ_BUFFER
+					 * 12:	WRITE_BUFFER
+					 * 11:	Obsolete
+					 * 10:	Host Protected Area
+					 *  9:	DEVICE Reset
+					 *  8:	SERVICE Interrupt
+					 *  7:	Release Interrupt
+					 *  6:	look-ahead
+					 *  5:	write cache
+					 *  4:	PACKET Command
+					 *  3:	Power Management Feature Set
+					 *  2:	Removable Feature Set
+					 *  1:	Security Feature Set
+					 *  0:	SMART Feature Set
+					 */
+	unsigned short  cfs_enable_2;	/* (word 86)
+					 * command set-feature enabled
+					 * 15:	Shall be ZERO
+					 * 14:	Shall be ONE
+					 * 13:	FLUSH CACHE EXT
+					 * 12:	FLUSH CACHE
+					 * 11:	Device Configuration Overlay
+					 * 10:	48-bit Address Feature Set
+					 *  9:	Automatic Acoustic Management
+					 *  8:	SET MAX security
+					 *  7:	reserved 1407DT PARTIES
+					 *  6:	SetF sub-command Power-Up
+					 *  5:	Power-Up in Standby Feature Set
+					 *  4:	Removable Media Notification
+					 *  3:	APM Feature Set
+					 *  2:	CFA Feature Set
+					 *  1:	READ/WRITE DMA QUEUED
+					 *  0:	Download MicroCode
+					 */
+	unsigned short  csf_default;	/* (word 87)
+					 * command set-feature default
+					 * 15:	Shall be ZERO
+					 * 14:	Shall be ONE
+					 * 13:3	reserved
+					 *  2:	Media Serial Number Valid
+					 *  1:	SMART selt-test supported
+					 *  0:	SMART error logging
+					 */
+	unsigned short  dma_ultra;	/* (word 88) */
+	unsigned short	word89;		/* reserved (word 89) */
+	unsigned short	word90;		/* reserved (word 90) */
+	unsigned short	CurAPMvalues;	/* current APM values */
+	unsigned short	word92;		/* reserved (word 92) */
+	unsigned short	hw_config;	/* hardware config (word 93)
+					 * 15:
+					 * 14:
+					 * 13:
+					 * 12:
+					 * 11:
+					 * 10:
+					 *  9:
+					 *  8:
+					 *  7:
+					 *  6:
+					 *  5:
+					 *  4:
+					 *  3:
+					 *  2:
+					 *  1:
+					 *  0:
+					 */
+	unsigned short	acoustic;	/* (word 94)
+					 * 15:8	Vendor's recommended value
+					 *  7:0	current value
+					 */
+	unsigned short	words95_99[5];	/* reserved words 95-99 */
+#if 0
+	unsigned short	words100_103[4]	;/* reserved words 100-103 */
+#else
+	unsigned long long lba_capacity_2;/* 48-bit total number of sectors */
+#endif
+	unsigned short	words104_125[22];/* reserved words 104-125 */
+	unsigned short	last_lun;	/* (word 126) */
+	unsigned short	word127;	/* (word 127) Feature Set
+					 * Removable Media Notification
+					 * 15:2	reserved
+					 *  1:0	00 = not supported
+					 *	01 = supported
+					 *	10 = reserved
+					 *	11 = reserved
+					 */
+	unsigned short	dlf;		/* (word 128)
+					 * device lock function
+					 * 15:9	reserved
+					 *  8	security level 1:max 0:high
+					 *  7:6	reserved
+					 *  5	enhanced erase
+					 *  4	expire
+					 *  3	frozen
+					 *  2	locked
+					 *  1	en/disabled
+					 *  0	capability
+					 */
+	unsigned short  csfo;		/*  (word 129)
+					 * current set features options
+					 * 15:4	reserved
+					 *  3:	auto reassign
+					 *  2:	reverting
+					 *  1:	read-look-ahead
+					 *  0:	write cache
+					 */
+	unsigned short	words130_155[26];/* reserved vendor words 130-155 */
+	unsigned short	word156;	/* reserved vendor word 156 */
+	unsigned short	words157_159[3];/* reserved vendor words 157-159 */
+	unsigned short	cfa_power;	/* (word 160) CFA Power Mode
+					 * 15 word 160 supported
+					 * 14 reserved
+					 * 13
+					 * 12
+					 * 11:0
+					 */
+	unsigned short	words161_175[14];/* Reserved for CFA */
+	unsigned short	words176_205[31];/* Current Media Serial Number */
+	unsigned short	words206_254[48];/* reserved words 206-254 */
+	unsigned short	integrity_word;	/* (word 255)
+					 * 15:8 Checksum
+					 *  7:0 Signature
+					 */
+};
+
+/*
+ * IDE "nice" flags. These are used on a per drive basis to determine
+ * when to be nice and give more bandwidth to the other devices which
+ * share the same IDE bus.
+ */
+#define IDE_NICE_DSC_OVERLAP	(0)	/* per the DSC overlap protocol */
+#define IDE_NICE_ATAPI_OVERLAP	(1)	/* not supported yet */
+#define IDE_NICE_0		(2)	/* when sure that it won't affect us */
+#define IDE_NICE_1		(3)	/* when probably won't affect us much */
+#define IDE_NICE_2		(4)	/* when we know it's on our expense */
+
+#ifdef __KERNEL__
+/*
+ * These routines are used for kernel command line parameters from main.c:
+ */
+#include <linux/config.h>
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+int ide_register(int io_port, int ctl_port, int irq);
+void ide_unregister(unsigned int);
+#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */
+
+#endif  /* __KERNEL__ */
+
+#endif	/* _LINUX_HDREG_H */
diff --git a/xen/include/xeno/hdsmart.h b/xen/include/xeno/hdsmart.h
new file mode 100644
index 0000000000..7974a47fe5
--- /dev/null
+++ b/xen/include/xeno/hdsmart.h
@@ -0,0 +1,124 @@
+/*
+ * linux/include/linux/hdsmart.h
+ *
+ * Copyright (C) 1999-2000	Michael Cornwell <cornwell@acm.org>
+ * Copyright (C) 2000		Andre Hedrick <andre@linux-ide.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_HDSMART_H
+#define _LINUX_HDSMART_H
+
+#define OFFLINE_FULL_SCAN		0
+#define SHORT_SELF_TEST			1
+#define EXTEND_SELF_TEST		2
+#define SHORT_CAPTIVE_SELF_TEST		129
+#define EXTEND_CAPTIVE_SELF_TEST	130
+
+/* smart_attribute is the vendor specific in SFF-8035 spec */
+typedef struct ata_smart_attribute_s {
+	unsigned char			id;
+	unsigned short			status_flag;
+	unsigned char			normalized;
+	unsigned char			worse_normal;
+	unsigned char			raw[6];
+	unsigned char			reserv;
+} __attribute__ ((packed)) ata_smart_attribute_t;
+
+/* smart_values is format of the read drive Atrribute command */
+typedef struct ata_smart_values_s {
+	unsigned short			revnumber;
+	ata_smart_attribute_t		vendor_attributes [30];
+        unsigned char			offline_data_collection_status;
+        unsigned char			self_test_exec_status;
+	unsigned short			total_time_to_complete_off_line;
+	unsigned char			vendor_specific_366;
+	unsigned char			offline_data_collection_capability;
+	unsigned short			smart_capability;
+	unsigned char			errorlog_capability;
+	unsigned char			vendor_specific_371;
+	unsigned char			short_test_completion_time;
+	unsigned char			extend_test_completion_time;
+	unsigned char			reserved_374_385 [12];
+	unsigned char			vendor_specific_386_509 [125];
+	unsigned char			chksum;
+} __attribute__ ((packed)) ata_smart_values_t;
+
+/* Smart Threshold data structures */
+/* Vendor attribute of SMART Threshold */
+typedef struct ata_smart_threshold_entry_s {
+	unsigned char			id;
+	unsigned char			normalized_threshold;
+	unsigned char			reserved[10];
+} __attribute__ ((packed)) ata_smart_threshold_entry_t;
+
+/* Format of Read SMART THreshold Command */
+typedef struct ata_smart_thresholds_s {
+	unsigned short			revnumber;
+	ata_smart_threshold_entry_t	thres_entries[30];
+	unsigned char			reserved[149];
+	unsigned char			chksum;
+} __attribute__ ((packed)) ata_smart_thresholds_t;
+
+typedef struct ata_smart_errorlog_command_struct_s {
+	unsigned char			devicecontrolreg;
+	unsigned char			featuresreg;
+	unsigned char			sector_count;
+	unsigned char			sector_number;
+	unsigned char			cylinder_low;
+	unsigned char			cylinder_high;
+	unsigned char			drive_head;
+	unsigned char			commandreg;
+	unsigned int			timestamp;
+} __attribute__ ((packed)) ata_smart_errorlog_command_struct_t;
+
+typedef struct ata_smart_errorlog_error_struct_s {
+	unsigned char			error_condition;
+	unsigned char			extended_error[14];
+	unsigned char			state;
+	unsigned short			timestamp;
+} __attribute__ ((packed)) ata_smart_errorlog_error_struct_t;
+
+typedef struct ata_smart_errorlog_struct_s {
+	ata_smart_errorlog_command_struct_t	commands[6];
+	ata_smart_errorlog_error_struct_t	error_struct;
+} __attribute__ ((packed)) ata_smart_errorlog_struct_t;
+
+typedef struct ata_smart_errorlog_s {
+	unsigned char			revnumber;
+	unsigned char			error_log_pointer;
+	ata_smart_errorlog_struct_t	errorlog_struct[5];
+	unsigned short			ata_error_count;
+	unsigned short			non_fatal_count;
+	unsigned short			drive_timeout_count;
+	unsigned char			reserved[53];
+	unsigned char			chksum;
+} __attribute__ ((packed)) ata_smart_errorlog_t;
+
+typedef struct ata_smart_selftestlog_struct_s {
+	unsigned char			selftestnumber;
+	unsigned char			selfteststatus;
+	unsigned short			timestamp;
+	unsigned char			selftestfailurecheckpoint;
+	unsigned int			lbafirstfailure;
+	unsigned char			vendorspecific[15];
+} __attribute__ ((packed)) ata_smart_selftestlog_struct_t;
+
+typedef struct ata_smart_selftestlog_s {
+	unsigned short			revnumber;
+	ata_smart_selftestlog_struct_t	selftest_struct[21];
+	unsigned char			vendorspecific[2];
+	unsigned char			mostrecenttest;
+	unsigned char			resevered[2];
+	unsigned char			chksum;
+} __attribute__ ((packed)) ata_smart_selftestlog_t;
+
+#endif	/* _LINUX_HDSMART_H */
diff --git a/xen/include/xeno/ide.h b/xen/include/xeno/ide.h
new file mode 100644
index 0000000000..dacfd89842
--- /dev/null
+++ b/xen/include/xeno/ide.h
@@ -0,0 +1,1105 @@
+#ifndef _IDE_H
+#define _IDE_H
+/*
+ *  linux/include/linux/ide.h
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/ioport.h>
+#include <xeno/hdreg.h>
+#include <xeno/hdsmart.h>
+#include <xeno/major.h>
+#include <xeno/blkdev.h>
+/*#include <xeno/proc_fs.h>*/
+/*#include <xeno/devfs_fs_kernel.h>*/
+#include <asm/hdreg.h>
+
+/*
+ * This is the multiple IDE interface driver, as evolved from hd.c.
+ * It supports up to four IDE interfaces, on one or more IRQs (usually 14 & 15).
+ * There can be up to two drives per interface, as per the ATA-2 spec.
+ *
+ * Primary i/f:    ide0: major=3;  (hda)         minor=0; (hdb)         minor=64
+ * Secondary i/f:  ide1: major=22; (hdc or hd1a) minor=0; (hdd or hd1b) minor=64
+ * Tertiary i/f:   ide2: major=33; (hde)         minor=0; (hdf)         minor=64
+ * Quaternary i/f: ide3: major=34; (hdg)         minor=0; (hdh)         minor=64
+ */
+
+/******************************************************************************
+ * IDE driver configuration options (play with these as desired):
+ *
+ * REALLY_SLOW_IO can be defined in ide.c and ide-cd.c, if necessary
+ */
+#undef REALLY_FAST_IO			/* define if ide ports are perfect */
+#define INITIAL_MULT_COUNT	0	/* off=0; on=2,4,8,16,32, etc.. */
+
+#ifndef SUPPORT_SLOW_DATA_PORTS		/* 1 to support slow data ports */
+#define SUPPORT_SLOW_DATA_PORTS	1	/* 0 to reduce kernel size */
+#endif
+#ifndef SUPPORT_VLB_SYNC		/* 1 to support weird 32-bit chips */
+#define SUPPORT_VLB_SYNC	1	/* 0 to reduce kernel size */
+#endif
+#ifndef DISK_RECOVERY_TIME		/* off=0; on=access_delay_time */
+#define DISK_RECOVERY_TIME	0	/*  for hardware that needs it */
+#endif
+#ifndef OK_TO_RESET_CONTROLLER		/* 1 needed for good error recovery */
+#define OK_TO_RESET_CONTROLLER	1	/* 0 for use with AH2372A/B interface */
+#endif
+#ifndef FANCY_STATUS_DUMPS		/* 1 for human-readable drive errors */
+#define FANCY_STATUS_DUMPS	1	/* 0 to reduce kernel size */
+#endif
+
+#ifdef CONFIG_BLK_DEV_CMD640
+#if 0	/* change to 1 when debugging cmd640 problems */
+void cmd640_dump_regs (void);
+#define CMD640_DUMP_REGS cmd640_dump_regs() /* for debugging cmd640 chipset */
+#endif
+#endif  /* CONFIG_BLK_DEV_CMD640 */
+
+#ifndef DISABLE_IRQ_NOSYNC
+#define DISABLE_IRQ_NOSYNC	0
+#endif
+
+/*
+ * IDE_DRIVE_CMD is used to implement many features of the hdparm utility
+ */
+#define IDE_DRIVE_CMD			99	/* (magic) undef to reduce kernel size*/
+
+#define IDE_DRIVE_TASK			98
+
+/*
+ * IDE_DRIVE_TASKFILE is used to implement many features needed for raw tasks
+ */
+#define IDE_DRIVE_TASKFILE		97
+
+/*
+ *  "No user-serviceable parts" beyond this point  :)
+ *****************************************************************************/
+
+typedef unsigned char	byte;	/* used everywhere */
+
+/*
+ * Probably not wise to fiddle with these
+ */
+#define ERROR_MAX	8	/* Max read/write errors per sector */
+#define ERROR_RESET	3	/* Reset controller every 4th retry */
+#define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
+
+/*
+ * state flags
+ */
+#define DMA_PIO_RETRY	1	/* retrying in PIO */
+
+/*
+ * Ensure that various configuration flags have compatible settings
+ */
+#ifdef REALLY_SLOW_IO
+#undef REALLY_FAST_IO
+#endif
+
+#define HWIF(drive)		((ide_hwif_t *)((drive)->hwif))
+#define HWGROUP(drive)		((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
+
+/*
+ * Definitions for accessing IDE controller registers
+ */
+#define IDE_NR_PORTS		(10)
+
+#define IDE_DATA_OFFSET		(0)
+#define IDE_ERROR_OFFSET	(1)
+#define IDE_NSECTOR_OFFSET	(2)
+#define IDE_SECTOR_OFFSET	(3)
+#define IDE_LCYL_OFFSET		(4)
+#define IDE_HCYL_OFFSET		(5)
+#define IDE_SELECT_OFFSET	(6)
+#define IDE_STATUS_OFFSET	(7)
+#define IDE_CONTROL_OFFSET	(8)
+#define IDE_IRQ_OFFSET		(9)
+
+#define IDE_FEATURE_OFFSET	IDE_ERROR_OFFSET
+#define IDE_COMMAND_OFFSET	IDE_STATUS_OFFSET
+
+#define IDE_DATA_OFFSET_HOB	(0)
+#define IDE_ERROR_OFFSET_HOB	(1)
+#define IDE_NSECTOR_OFFSET_HOB	(2)
+#define IDE_SECTOR_OFFSET_HOB	(3)
+#define IDE_LCYL_OFFSET_HOB	(4)
+#define IDE_HCYL_OFFSET_HOB	(5)
+#define IDE_SELECT_OFFSET_HOB	(6)
+#define IDE_CONTROL_OFFSET_HOB	(7)
+
+#define IDE_FEATURE_OFFSET_HOB	IDE_ERROR_OFFSET_HOB
+
+#define IDE_DATA_REG		(HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG		(HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG		(HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG		(HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG		(HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG		(HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG		(HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG		(HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG		(HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+#define IDE_IRQ_REG		(HWIF(drive)->io_ports[IDE_IRQ_OFFSET])
+
+#define IDE_DATA_REG_HOB	(HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG_HOB	(HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG_HOB	(HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG_HOB	(HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG_HOB	(HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG_HOB	(HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG_HOB	(HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG_HOB	(HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG_HOB	(HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+
+#define IDE_FEATURE_REG		IDE_ERROR_REG
+#define IDE_COMMAND_REG		IDE_STATUS_REG
+#define IDE_ALTSTATUS_REG	IDE_CONTROL_REG
+#define IDE_IREASON_REG		IDE_NSECTOR_REG
+#define IDE_BCOUNTL_REG		IDE_LCYL_REG
+#define IDE_BCOUNTH_REG		IDE_HCYL_REG
+
+#define GET_ERR()		IN_BYTE(IDE_ERROR_REG)
+#define GET_STAT()		IN_BYTE(IDE_STATUS_REG)
+#define GET_ALTSTAT()		IN_BYTE(IDE_CONTROL_REG)
+#define OK_STAT(stat,good,bad)	(((stat)&((good)|(bad)))==(good))
+#define BAD_R_STAT		(BUSY_STAT   | ERR_STAT)
+#define BAD_W_STAT		(BAD_R_STAT  | WRERR_STAT)
+#define BAD_STAT		(BAD_R_STAT  | DRQ_STAT)
+#define DRIVE_READY		(READY_STAT  | SEEK_STAT)
+#define DATA_READY		(DRQ_STAT)
+
+/*
+ * Some more useful definitions
+ */
+#define IDE_MAJOR_NAME	"hd"	/* the same for all i/f; see also genhd.c */
+#define MAJOR_NAME	IDE_MAJOR_NAME
+#define PARTN_BITS	6	/* number of minor dev bits for partitions */
+#define PARTN_MASK	((1<<PARTN_BITS)-1)	/* a useful bit mask */
+#define MAX_DRIVES	2	/* per interface; 2 assumed by lots of code */
+#define CASCADE_DRIVES	8	/* per interface; 8|2 assumed by lots of code */
+#define SECTOR_SIZE	512
+#define SECTOR_WORDS	(SECTOR_SIZE / 4)	/* number of 32bit words per sector */
+#define IDE_LARGE_SEEK(b1,b2,t)	(((b1) > (b2) + (t)) || ((b2) > (b1) + (t)))
+#define IDE_MIN(a,b)	((a)<(b) ? (a):(b))
+#define IDE_MAX(a,b)	((a)>(b) ? (a):(b))
+
+#ifndef SPLIT_WORD
+#  define SPLIT_WORD(W,HB,LB) ((HB)=(W>>8), (LB)=(W-((W>>8)<<8)))
+#endif
+#ifndef MAKE_WORD
+#  define MAKE_WORD(W,HB,LB) ((W)=((HB<<8)+LB))
+#endif
+
+
+/*
+ * Timeouts for various operations:
+ */
+#define WAIT_DRQ	(5*HZ/100)	/* 50msec - spec allows up to 20ms */
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+#define WAIT_READY	(5*HZ)		/* 5sec - some laptops are very slow */
+#else
+#define WAIT_READY	(3*HZ/100)	/* 30msec - should be instantaneous */
+#endif /* CONFIG_APM || CONFIG_APM_MODULE */
+#define WAIT_PIDENTIFY	(10*HZ)	/* 10sec  - should be less than 3ms (?), if all ATAPI CD is closed at boot */
+#define WAIT_WORSTCASE	(30*HZ)	/* 30sec  - worst case when spinning up */
+#define WAIT_CMD	(10*HZ)	/* 10sec  - maximum wait for an IRQ to happen */
+#define WAIT_MIN_SLEEP	(2*HZ/100)	/* 20msec - minimum sleep time */
+
+#define SELECT_DRIVE(hwif,drive)				\
+{								\
+	if (hwif->selectproc)					\
+		hwif->selectproc(drive);			\
+	OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); \
+}
+
+#define SELECT_INTERRUPT(hwif,drive)				\
+{								\
+	if (hwif->intrproc)					\
+		hwif->intrproc(drive);				\
+	else							\
+		OUT_BYTE((drive)->ctl|2, hwif->io_ports[IDE_CONTROL_OFFSET]);	\
+}
+
+#define SELECT_MASK(hwif,drive,mask)				\
+{								\
+	if (hwif->maskproc)					\
+		hwif->maskproc(drive,mask);			\
+}
+
+#define SELECT_READ_WRITE(hwif,drive,func)			\
+{								\
+	if (hwif->rwproc)					\
+		hwif->rwproc(drive,func);			\
+}
+
+#define QUIRK_LIST(hwif,drive)					\
+{								\
+	if (hwif->quirkproc)					\
+		(drive)->quirk_list = hwif->quirkproc(drive);	\
+}
+
+#define HOST(hwif,chipset)					\
+{								\
+	return ((hwif)->chipset == chipset) ? 1 : 0;		\
+}
+
+#define IDE_DEBUG(lineno) \
+	printk("%s,%s,line=%d\n", __FILE__, __FUNCTION__, (lineno))
+
+/*
+ * Check for an interrupt and acknowledge the interrupt status
+ */
+struct hwif_s;
+typedef int (ide_ack_intr_t)(struct hwif_s *);
+
+#ifndef NO_DMA
+#define NO_DMA  255
+#endif
+
+/*
+ * hwif_chipset_t is used to keep track of the specific hardware
+ * chipset used by each IDE interface, if known.
+ */
+typedef enum {	ide_unknown,	ide_generic,	ide_pci,
+		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
+		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
+		ide_pdc4030,	ide_rz1000,	ide_trm290,
+		ide_cmd646,	ide_cy82c693,	ide_4drives,
+		ide_pmac,	ide_etrax100
+} hwif_chipset_t;
+
+/*
+ * Structure to hold all information about the location of this port
+ */
+typedef struct hw_regs_s {
+	ide_ioreg_t	io_ports[IDE_NR_PORTS];	/* task file registers */
+	int		irq;			/* our irq number */
+	int		dma;			/* our dma entry */
+	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
+	void		*priv;			/* interface specific data */
+	hwif_chipset_t  chipset;
+} hw_regs_t;
+
+/*
+ * Register new hardware with ide
+ */
+int ide_register_hw(hw_regs_t *hw, struct hwif_s **hwifp);
+
+/*
+ * Set up hw_regs_t structure before calling ide_register_hw (optional)
+ */
+void ide_setup_ports(	hw_regs_t *hw,
+			ide_ioreg_t base,
+			int *offsets,
+			ide_ioreg_t ctrl,
+			ide_ioreg_t intr,
+			ide_ack_intr_t *ack_intr,
+			int irq);
+
+#include <asm/ide.h>
+
+/*
+ * If the arch-dependant ide.h did not declare/define any OUT_BYTE
+ * or IN_BYTE functions, we make some defaults here.
+ */
+
+#ifndef HAVE_ARCH_OUT_BYTE
+#ifdef REALLY_FAST_IO
+#define OUT_BYTE(b,p)          outb((b),(p))
+#define OUT_WORD(w,p)          outw((w),(p))
+#else
+#define OUT_BYTE(b,p)          outb_p((b),(p))
+#define OUT_WORD(w,p)          outw_p((w),(p))
+#endif
+#endif
+
+#ifndef HAVE_ARCH_IN_BYTE
+#ifdef REALLY_FAST_IO
+#define IN_BYTE(p)             (byte)inb(p)
+#define IN_WORD(p)             (short)inw(p)
+#else
+#define IN_BYTE(p)             (byte)inb_p(p)
+#define IN_WORD(p)             (short)inw_p(p)
+#endif
+#endif
+
+/*
+ * Now for the data we need to maintain per-drive:  ide_drive_t
+ */
+
+#define ide_scsi	0x21
+#define ide_disk	0x20
+#define ide_optical	0x7
+#define ide_cdrom	0x5
+#define ide_tape	0x1
+#define ide_floppy	0x0
+
+typedef union {
+	unsigned all			: 8;	/* all of the bits together */
+	struct {
+		unsigned set_geometry	: 1;	/* respecify drive geometry */
+		unsigned recalibrate	: 1;	/* seek to cyl 0      */
+		unsigned set_multmode	: 1;	/* set multmode count */
+		unsigned set_tune	: 1;	/* tune interface for drive */
+		unsigned reserved	: 4;	/* unused */
+	} b;
+} special_t;
+
+typedef struct ide_drive_s {
+	request_queue_t		 queue;	/* request queue */
+	struct ide_drive_s 	*next;	/* circular list of hwgroup drives */
+	unsigned long sleep;		/* sleep until this time */
+	unsigned long service_start;	/* time we started last request */
+	unsigned long service_time;	/* service time of last request */
+	unsigned long timeout;		/* max time to wait for irq */
+	special_t	special;	/* special action flags */
+	byte     keep_settings;		/* restore settings after drive reset */
+	byte     using_dma;		/* disk is using dma for read/write */
+	byte	 retry_pio;		/* retrying dma capable host in pio */
+	byte	 state;			/* retry state */
+	byte     waiting_for_dma;	/* dma currently in progress */
+	byte     unmask;		/* flag: okay to unmask other irqs */
+	byte     slow;			/* flag: slow data port */
+	byte     bswap;			/* flag: byte swap data */
+	byte     dsc_overlap;		/* flag: DSC overlap */
+	byte     nice1;			/* flag: give potential excess bandwidth */
+	unsigned present	: 1;	/* drive is physically present */
+	unsigned noprobe 	: 1;	/* from:  hdx=noprobe */
+	unsigned busy		: 1;	/* currently doing revalidate_disk() */
+	unsigned removable	: 1;	/* 1 if need to do check_media_change */
+	unsigned forced_geom	: 1;	/* 1 if hdx=c,h,s was given at boot */
+	unsigned no_unmask	: 1;	/* disallow setting unmask bit */
+	unsigned no_io_32bit	: 1;	/* disallow enabling 32bit I/O */
+	unsigned nobios		: 1;	/* flag: do not probe bios for drive */
+	unsigned revalidate	: 1;	/* request revalidation */
+	unsigned atapi_overlap	: 1;	/* flag: ATAPI overlap (not supported) */
+	unsigned nice0		: 1;	/* flag: give obvious excess bandwidth */
+	unsigned nice2		: 1;	/* flag: give a share in our own bandwidth */
+	unsigned doorlocking	: 1;	/* flag: for removable only: door lock/unlock works */
+	unsigned autotune	: 2;	/* 1=autotune, 2=noautotune, 0=default */
+	unsigned remap_0_to_1	: 2;	/* 0=remap if ezdrive, 1=remap, 2=noremap */
+	unsigned ata_flash	: 1;	/* 1=present, 0=default */
+	unsigned	addressing;	/* : 2; 0=28-bit, 1=48-bit, 2=64-bit */
+	byte		scsi;		/* 0=default, 1=skip current ide-subdriver for ide-scsi emulation */
+	byte		media;		/* disk, cdrom, tape, floppy, ... */
+	select_t	select;		/* basic drive/head select reg value */
+	byte		ctl;		/* "normal" value for IDE_CONTROL_REG */
+	byte		ready_stat;	/* min status value for drive ready */
+	byte		mult_count;	/* current multiple sector setting */
+	byte 		mult_req;	/* requested multiple sector setting */
+	byte 		tune_req;	/* requested drive tuning setting */
+	byte		io_32bit;	/* 0=16-bit, 1=32-bit, 2/3=32bit+sync */
+	byte		bad_wstat;	/* used for ignoring WRERR_STAT */
+	byte		nowerr;		/* used for ignoring WRERR_STAT */
+	byte		sect0;		/* offset of first sector for DM6:DDO */
+	unsigned int	usage;		/* current "open()" count for drive */
+	byte 		head;		/* "real" number of heads */
+	byte		sect;		/* "real" sectors per track */
+	byte		bios_head;	/* BIOS/fdisk/LILO number of heads */
+	byte		bios_sect;	/* BIOS/fdisk/LILO sectors per track */
+	unsigned int	bios_cyl;	/* BIOS/fdisk/LILO number of cyls */
+	unsigned int	cyl;		/* "real" number of cyls */
+	unsigned long	capacity;	/* total number of sectors */
+	unsigned long long capacity48;	/* total number of sectors */
+	unsigned int	drive_data;	/* for use by tuneproc/selectproc as needed */
+	void		  *hwif;	/* actually (ide_hwif_t *) */
+    /*wait_queue_head_t wqueue;*/	/* used to wait for drive in open() */
+	struct hd_driveid *id;		/* drive model identification info */
+	struct hd_struct  *part;	/* drive partition table */
+	char		name[4];	/* drive name, such as "hda" */
+	void 		*driver;	/* (ide_driver_t *) */
+	void		*driver_data;	/* extra driver data */
+    /*devfs_handle_t	de;	*/	/* directory for device */
+	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
+	void		*settings;	/* /proc/ide/ drive settings */
+	char		driver_req[10];	/* requests specific driver */
+	int		last_lun;	/* last logical unit */
+	int		forced_lun;	/* if hdxlun was given at boot */
+	int		lun;		/* logical unit */
+	int		crc_count;	/* crc counter to reduce drive speed */
+	byte		quirk_list;	/* drive is considered quirky if set for a specific host */
+	byte		suspend_reset;	/* drive suspend mode flag, soft-reset recovers */
+	byte		init_speed;	/* transfer rate set at boot */
+	byte		current_speed;	/* current transfer rate set */
+	byte		dn;		/* now wide spread use */
+	byte		wcache;		/* status of write cache */
+	byte		acoustic;	/* acoustic management */
+	unsigned int	failures;	/* current failure count */
+	unsigned int	max_failures;	/* maximum allowed failure count */
+} ide_drive_t;
+
+/*
+ * An ide_dmaproc_t() initiates/aborts DMA read/write operations on a drive.
+ *
+ * The caller is assumed to have selected the drive and programmed the drive's
+ * sector address using CHS or LBA.  All that remains is to prepare for DMA
+ * and then issue the actual read/write DMA/PIO command to the drive.
+ *
+ * Returns 0 if all went well.
+ * Returns 1 if DMA read/write could not be started, in which case the caller
+ * should either try again later, or revert to PIO for the current request.
+ */
+typedef enum {	ide_dma_read,	ide_dma_write,		ide_dma_begin,
+		ide_dma_end,	ide_dma_check,		ide_dma_on,
+		ide_dma_off,	ide_dma_off_quietly,	ide_dma_test_irq,
+		ide_dma_bad_drive,			ide_dma_good_drive,
+		ide_dma_verbose,			ide_dma_retune,
+		ide_dma_lostirq,			ide_dma_timeout
+} ide_dma_action_t;
+
+typedef int (ide_dmaproc_t)(ide_dma_action_t, ide_drive_t *);
+
+/*
+ * An ide_ideproc_t() performs CPU-polled transfers to/from a drive.
+ * Arguments are: the drive, the buffer pointer, and the length (in bytes or
+ * words depending on if it's an IDE or ATAPI call).
+ *
+ * If it is not defined for a controller, standard-code is used from ide.c.
+ *
+ * Controllers which are not memory-mapped in the standard way need to 
+ * override that mechanism using this function to work.
+ *
+ */
+typedef enum { ideproc_ide_input_data,    ideproc_ide_output_data,
+	       ideproc_atapi_input_bytes, ideproc_atapi_output_bytes
+} ide_ide_action_t;
+
+typedef void (ide_ideproc_t)(ide_ide_action_t, ide_drive_t *, void *, unsigned int);
+
+/*
+ * An ide_tuneproc_t() is used to set the speed of an IDE interface
+ * to a particular PIO mode.  The "byte" parameter is used
+ * to select the PIO mode by number (0,1,2,3,4,5), and a value of 255
+ * indicates that the interface driver should "auto-tune" the PIO mode
+ * according to the drive capabilities in drive->id;
+ *
+ * Not all interface types support tuning, and not all of those
+ * support all possible PIO settings.  They may silently ignore
+ * or round values as they see fit.
+ */
+typedef void (ide_tuneproc_t) (ide_drive_t *, byte);
+typedef int (ide_speedproc_t) (ide_drive_t *, byte);
+
+/*
+ * This is used to provide support for strange interfaces
+ */
+typedef void (ide_selectproc_t) (ide_drive_t *);
+typedef void (ide_resetproc_t) (ide_drive_t *);
+typedef int (ide_quirkproc_t) (ide_drive_t *);
+typedef void (ide_intrproc_t) (ide_drive_t *);
+typedef void (ide_maskproc_t) (ide_drive_t *, int);
+typedef void (ide_rw_proc_t) (ide_drive_t *, ide_dma_action_t);
+
+/*
+ * ide soft-power support
+ */
+typedef int (ide_busproc_t) (ide_drive_t *, int);
+
+#define IDE_CHIPSET_PCI_MASK	\
+    ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx))
+#define IDE_CHIPSET_IS_PCI(c)	((IDE_CHIPSET_PCI_MASK >> (c)) & 1)
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+typedef struct ide_pci_devid_s {
+	unsigned short	vid;
+	unsigned short	did;
+} ide_pci_devid_t;
+
+#define IDE_PCI_DEVID_NULL	((ide_pci_devid_t){0,0})
+#define IDE_PCI_DEVID_EQ(a,b)	(a.vid == b.vid && a.did == b.did)
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+typedef struct hwif_s {
+	struct hwif_s	*next;		/* for linked-list in ide_hwgroup_t */
+	void		*hwgroup;	/* actually (ide_hwgroup_t *) */
+	ide_ioreg_t	io_ports[IDE_NR_PORTS];	/* task file registers */
+	hw_regs_t	hw;		/* Hardware info */
+	ide_drive_t	drives[MAX_DRIVES];	/* drive info */
+	struct gendisk	*gd;		/* gendisk structure */
+	ide_tuneproc_t	*tuneproc;	/* routine to tune PIO mode for drives */
+	ide_speedproc_t	*speedproc;	/* routine to retune DMA modes for drives */
+	ide_selectproc_t *selectproc;	/* tweaks hardware to select drive */
+	ide_resetproc_t	*resetproc;	/* routine to reset controller after a disk reset */
+	ide_intrproc_t	*intrproc;	/* special interrupt handling for shared pci interrupts */
+	ide_maskproc_t	*maskproc;	/* special host masking for drive selection */
+	ide_quirkproc_t	*quirkproc;	/* check host's drive quirk list */
+	ide_rw_proc_t	*rwproc;	/* adjust timing based upon rq->cmd direction */
+	ide_ideproc_t   *ideproc;       /* CPU-polled transfer routine */
+	ide_dmaproc_t	*dmaproc;	/* dma read/write/abort routine */
+	unsigned int	*dmatable_cpu;	/* dma physical region descriptor table (cpu view) */
+	dma_addr_t	dmatable_dma;	/* dma physical region descriptor table (dma view) */
+	struct scatterlist *sg_table;	/* Scatter-gather list used to build the above */
+	int sg_nents;			/* Current number of entries in it */
+	int sg_dma_direction;		/* dma transfer direction */
+	int sg_dma_active;		/* is it in use */
+	struct hwif_s	*mate;		/* other hwif from same PCI chip */
+	unsigned long	dma_base;	/* base addr for dma ports */
+	unsigned	dma_extra;	/* extra addr for dma ports */
+	unsigned long	config_data;	/* for use by chipset-specific code */
+	unsigned long	select_data;	/* for use by chipset-specific code */
+	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
+	int		irq;		/* our irq number */
+	byte		major;		/* our major number */
+	char 		name[6];	/* name of interface, eg. "ide0" */
+	byte		index;		/* 0 for ide0; 1 for ide1; ... */
+	hwif_chipset_t	chipset;	/* sub-module for tuning.. */
+	unsigned	noprobe    : 1;	/* don't probe for this interface */
+	unsigned	present    : 1;	/* this interface exists */
+	unsigned	serialized : 1;	/* serialized operation with mate hwif */
+	unsigned	sharing_irq: 1;	/* 1 = sharing irq with another hwif */
+	unsigned	reset      : 1;	/* reset after probe */
+	unsigned	autodma    : 1;	/* automatically try to enable DMA at boot */
+	unsigned	udma_four  : 1;	/* 1=ATA-66 capable, 0=default */
+	byte		channel;	/* for dual-port chips: 0=primary, 1=secondary */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+	struct pci_dev	*pci_dev;	/* for pci chipsets */
+	ide_pci_devid_t	pci_devid;	/* for pci chipsets: {VID,DID} */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+#if (DISK_RECOVERY_TIME > 0)
+	unsigned long	last_time;	/* time when previous rq was done */
+#endif
+	byte		straight8;	/* Alan's straight 8 check */
+	void		*hwif_data;	/* extra hwif data */
+	ide_busproc_t	*busproc;	/* driver soft-power interface */
+	byte		bus_state;	/* power state of the IDE bus */
+} ide_hwif_t;
+
+/*
+ * Status returned from various ide_ functions
+ */
+typedef enum {
+	ide_stopped,	/* no drive operation was started */
+	ide_started	/* a drive operation was started, and a handler was set */
+} ide_startstop_t;
+
+/*
+ *  internal ide interrupt handler type
+ */
+typedef ide_startstop_t (ide_pre_handler_t)(ide_drive_t *, struct request *);
+typedef ide_startstop_t (ide_handler_t)(ide_drive_t *);
+typedef ide_startstop_t (ide_post_handler_t)(ide_drive_t *);
+
+/*
+ * when ide_timer_expiry fires, invoke a handler of this type
+ * to decide what to do.
+ */
+typedef int (ide_expiry_t)(ide_drive_t *);
+
+typedef struct hwgroup_s {
+	ide_handler_t		*handler;/* irq handler, if active */
+	volatile int		busy;	/* BOOL: protects all fields below */
+	int			sleeping; /* BOOL: wake us up on timer expiry */
+	ide_drive_t		*drive;	/* current drive */
+	ide_hwif_t		*hwif;	/* ptr to current hwif in linked-list */
+	struct request		*rq;	/* current request */
+	struct timer_list	timer;	/* failsafe timer */
+	struct request		wrq;	/* local copy of current write rq */
+	unsigned long		poll_timeout;	/* timeout value during long polls */
+	ide_expiry_t		*expiry;	/* queried upon timeouts */
+} ide_hwgroup_t;
+
+/* structure attached to the request for IDE_TASK_CMDS */
+
+/*
+ * configurable drive settings
+ */
+
+#define TYPE_INT	0
+#define TYPE_INTA	1
+#define TYPE_BYTE	2
+#define TYPE_SHORT	3
+
+#define SETTING_READ	(1 << 0)
+#define SETTING_WRITE	(1 << 1)
+#define SETTING_RW	(SETTING_READ | SETTING_WRITE)
+
+typedef int (ide_procset_t)(ide_drive_t *, int);
+typedef struct ide_settings_s {
+	char			*name;
+	int			rw;
+	int			read_ioctl;
+	int			write_ioctl;
+	int			data_type;
+	int			min;
+	int			max;
+	int			mul_factor;
+	int			div_factor;
+	void			*data;
+	ide_procset_t		*set;
+	int			auto_remove;
+	struct ide_settings_s	*next;
+} ide_settings_t;
+
+void ide_add_setting(ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set);
+void ide_remove_setting(ide_drive_t *drive, char *name);
+ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name);
+int ide_read_setting(ide_drive_t *t, ide_settings_t *setting);
+int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val);
+void ide_add_generic_settings(ide_drive_t *drive);
+
+#if 0
+/*
+ * /proc/ide interface
+ */
+typedef struct {
+	const char	*name;
+	mode_t		mode;
+	read_proc_t	*read_proc;
+	write_proc_t	*write_proc;
+} ide_proc_entry_t;
+#endif
+
+#ifdef CONFIG_PROC_FS
+void proc_ide_create(void);
+void proc_ide_destroy(void);
+void recreate_proc_ide_device(ide_hwif_t *, ide_drive_t *);
+void destroy_proc_ide_device(ide_hwif_t *, ide_drive_t *);
+void destroy_proc_ide_drives(ide_hwif_t *);
+void create_proc_ide_interfaces(void);
+void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data);
+void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p);
+read_proc_t proc_ide_read_capacity;
+read_proc_t proc_ide_read_geometry;
+
+/*
+ * Standard exit stuff:
+ */
+#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \
+{					\
+	len -= off;			\
+	if (len < count) {		\
+		*eof = 1;		\
+		if (len <= 0)		\
+			return 0;	\
+	} else				\
+		len = count;		\
+	*start = page + off;		\
+	return len;			\
+}
+#else
+#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
+#endif
+
+/*
+ * Subdrivers support.
+ */
+#define IDE_SUBDRIVER_VERSION	1
+
+typedef int		(ide_cleanup_proc)(ide_drive_t *);
+typedef int		(ide_standby_proc)(ide_drive_t *);
+typedef int		(ide_flushcache_proc)(ide_drive_t *);
+typedef ide_startstop_t	(ide_do_request_proc)(ide_drive_t *, struct request *, unsigned long);
+typedef void		(ide_end_request_proc)(byte, ide_hwgroup_t *);
+typedef int		(ide_ioctl_proc)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long);
+typedef int		(ide_open_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef void		(ide_release_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef int		(ide_check_media_change_proc)(ide_drive_t *);
+typedef void		(ide_revalidate_proc)(ide_drive_t *);
+typedef void		(ide_pre_reset_proc)(ide_drive_t *);
+typedef unsigned long	(ide_capacity_proc)(ide_drive_t *);
+typedef ide_startstop_t	(ide_special_proc)(ide_drive_t *);
+typedef void		(ide_setting_proc)(ide_drive_t *);
+typedef int		(ide_reinit_proc)(ide_drive_t *);
+typedef void		(ata_prebuilder_proc)(ide_drive_t *);
+typedef void		(atapi_prebuilder_proc)(ide_drive_t *);
+
+typedef struct ide_driver_s {
+	const char			*name;
+	const char			*version;
+	byte				media;
+	unsigned busy			: 1;
+	unsigned supports_dma		: 1;
+	unsigned supports_dsc_overlap	: 1;
+	ide_cleanup_proc		*cleanup;
+	ide_standby_proc		*standby;
+	ide_flushcache_proc		*flushcache;
+	ide_do_request_proc		*do_request;
+	ide_end_request_proc		*end_request;
+	ide_ioctl_proc			*ioctl;
+	ide_open_proc			*open;
+	ide_release_proc		*release;
+	ide_check_media_change_proc	*media_change;
+	ide_revalidate_proc		*revalidate;
+	ide_pre_reset_proc		*pre_reset;
+	ide_capacity_proc		*capacity;
+	ide_special_proc		*special;
+    /*ide_proc_entry_t		*proc;*/
+	ide_reinit_proc			*reinit;
+	ata_prebuilder_proc		*ata_prebuilder;
+	atapi_prebuilder_proc		*atapi_prebuilder;
+} ide_driver_t;
+
+#define DRIVER(drive)		((ide_driver_t *)((drive)->driver))
+
+/*
+ * IDE modules.
+ */
+#define IDE_CHIPSET_MODULE		0	/* not supported yet */
+#define IDE_PROBE_MODULE		1
+#define IDE_DRIVER_MODULE		2
+
+typedef int	(ide_module_init_proc)(void);
+
+typedef struct ide_module_s {
+	int				type;
+	ide_module_init_proc		*init;
+	void				*info;
+	struct ide_module_s		*next;
+} ide_module_t;
+
+/*
+ * ide_hwifs[] is the master data structure used to keep track
+ * of just about everything in ide.c.  Whenever possible, routines
+ * should be using pointers to a drive (ide_drive_t *) or
+ * pointers to a hwif (ide_hwif_t *), rather than indexing this
+ * structure directly (the allocation/layout may change!).
+ *
+ */
+#ifndef _IDE_C
+extern	ide_hwif_t	ide_hwifs[];		/* master data repository */
+extern	ide_module_t	*ide_modules;
+extern	ide_module_t	*ide_probe;
+#endif
+extern int noautodma;
+
+/*
+ * We need blk.h, but we replace its end_request by our own version.
+ */
+#define IDE_DRIVER		/* Toggle some magic bits in blk.h */
+#define LOCAL_END_REQUEST	/* Don't generate end_request in blk.h */
+#include <xeno/blk.h>
+
+void ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup);
+
+/*
+ * This is used for (nearly) all data transfers from/to the IDE interface
+ * FIXME for 2.5, to a pointer pass verses memcpy........
+ */
+void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+
+/*
+ * This is used for (nearly) all ATAPI data transfers from/to the IDE interface
+ * FIXME for 2.5, to a pointer pass verses memcpy........
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+
+int drive_is_ready (ide_drive_t *drive);
+
+/*
+ * This is used on exit from the driver, to designate the next irq handler
+ * and also to start the safety timer.
+ */
+void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry);
+
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat);
+
+/*
+ * ide_error() takes action based on the error returned by the controller.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
+
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler);
+
+/*
+ * ide_fixstring() cleans up and (optionally) byte-swaps a text string,
+ * removing leading/trailing blanks and compressing internal blanks.
+ * It is primarily used to tidy up the model name/number fields as
+ * returned by the WIN_[P]IDENTIFY commands.
+ */
+void ide_fixstring (byte *s, const int bytecount, const int byteswap);
+
+/*
+ * This routine busy-waits for the drive status to be not "busy".
+ * It then checks the status for all of the "good" bits and none
+ * of the "bad" bits, and if all is okay it returns 0.  All other
+ * cases return 1 after doing "*startstop = ide_error()", and the
+ * caller should return the updated value of "startstop" in this case.
+ * "startstop" is unchanged when the function returns 0;
+ */
+int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout);
+
+int ide_wait_noerr (ide_drive_t *drive, byte good, byte bad, unsigned long timeout);
+
+/*
+ * This routine is called from the partition-table code in genhd.c
+ * to "convert" a drive to a logical geometry with fewer than 1024 cyls.
+ */
+int ide_xlate_1024 (kdev_t, int, int, const char *);
+
+/*
+ * Convert kdev_t structure into ide_drive_t * one.
+ */
+ide_drive_t *get_info_ptr (kdev_t i_rdev);
+
+/*
+ * Return the current idea about the total capacity of this drive.
+ */
+unsigned long current_capacity (ide_drive_t *drive);
+
+/*
+ * Start a reset operation for an IDE interface.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t ide_do_reset (ide_drive_t *);
+
+/*
+ * Re-Start an operation for an IDE interface.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t restart_request (ide_drive_t *);
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_cmd (struct request *rq);
+
+/*
+ * "action" parameter type for ide_do_drive_cmd() below.
+ */
+typedef enum {
+	ide_wait,	/* insert rq at end of list, and wait for it */
+	ide_next,	/* insert rq immediately after current request */
+	ide_preempt,	/* insert rq in front of current request */
+	ide_end		/* insert rq at end of list, but don't wait for it */
+} ide_action_t;
+
+/*
+ * This function issues a special IDE device request
+ * onto the request queue.
+ *
+ * If action is ide_wait, then the rq is queued at the end of the
+ * request queue, and the function sleeps until it has been processed.
+ * This is for use when invoked from an ioctl handler.
+ *
+ * If action is ide_preempt, then the rq is queued at the head of
+ * the request queue, displacing the currently-being-processed
+ * request and this function returns immediately without waiting
+ * for the new rq to be completed.  This is VERY DANGEROUS, and is
+ * intended for careful use by the ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_next, then the rq is queued immediately after
+ * the currently-being-processed-request (if any), and the function
+ * returns without waiting for the new rq to be completed.  As above,
+ * This is VERY DANGEROUS, and is intended for careful use by the
+ * ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_end, then the rq is queued at the end of the
+ * request queue, and the function returns immediately without waiting
+ * for the new rq to be completed. This is again intended for careful
+ * use by the ATAPI tape/cdrom driver code.
+ */
+int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action);
+
+/*
+ * Clean up after success/failure of an explicit drive cmd.
+ * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD).
+ * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK).
+ */
+void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err);
+
+/*
+ * Issue ATA command and wait for completion. use for implementing commands in kernel
+ */
+int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf);
+
+int ide_wait_cmd_task (ide_drive_t *drive, byte *buf);
+ 
+typedef struct ide_task_s {
+	task_ioreg_t		tfRegister[8];
+	task_ioreg_t		hobRegister[8];
+	ide_reg_valid_t		tf_out_flags;
+	ide_reg_valid_t		tf_in_flags;
+	int			data_phase;
+	int			command_type;
+	ide_pre_handler_t	*prehandler;
+	ide_handler_t		*handler;
+	ide_post_handler_t	*posthandler;
+	void			*special;	/* valid_t generally */
+	struct request		*rq;		/* copy of request */
+	unsigned long		block;		/* copy of block */
+} ide_task_t;
+
+typedef struct pkt_task_s {
+	task_ioreg_t		tfRegister[8];
+	int			data_phase;
+	int			command_type;
+	ide_handler_t		*handler;
+	void			*special;
+	struct request		*rq;		/* copy of request */
+	unsigned long		block;		/* copy of block */
+} pkt_task_t;
+
+/*
+ * taskfile io for disks for now...
+ */
+ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task);
+
+/*
+ * Builds request from ide_ioctl
+ */
+void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler);
+
+/*
+ * Special Flagged Register Validation Caller
+ */
+// ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task);
+
+ide_startstop_t set_multmode_intr (ide_drive_t *drive);
+ide_startstop_t set_geometry_intr (ide_drive_t *drive);
+ide_startstop_t recal_intr (ide_drive_t *drive);
+ide_startstop_t task_no_data_intr (ide_drive_t *drive);
+ide_startstop_t task_in_intr (ide_drive_t *drive);
+ide_startstop_t task_mulin_intr (ide_drive_t *drive);
+ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq);
+ide_startstop_t task_out_intr (ide_drive_t *drive);
+ide_startstop_t task_mulout_intr (ide_drive_t *drive);
+void ide_init_drive_taskfile (struct request *rq);
+
+int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf);
+
+int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *cmd, byte *buf);
+
+ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile);
+ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile);
+/* Expects args is a full set of TF registers and parses the command type */
+int ide_cmd_type_parser (ide_task_t *args);
+
+int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
+
+#ifdef CONFIG_PKT_TASK_IOCTL
+int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
+#endif /* CONFIG_PKT_TASK_IOCTL */
+
+void ide_delay_50ms (void);
+int system_bus_clock(void);
+
+byte ide_auto_reduce_xfer (ide_drive_t *drive);
+int ide_driveid_update (ide_drive_t *drive);
+int ide_ata66_check (ide_drive_t *drive, ide_task_t *args);
+int ide_config_drive_speed (ide_drive_t *drive, byte speed);
+byte eighty_ninty_three (ide_drive_t *drive);
+int set_transfer (ide_drive_t *drive, ide_task_t *args);
+
+/*
+ * ide_system_bus_speed() returns what we think is the system VESA/PCI
+ * bus speed (in MHz).  This is used for calculating interface PIO timings.
+ * The default is 40 for known PCI systems, 50 otherwise.
+ * The "idebus=xx" parameter can be used to override this value.
+ */
+int ide_system_bus_speed (void);
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multwrite operation.
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount);
+
+/*
+ * ide_stall_queue() can be used by a drive to give excess bandwidth back
+ * to the hwgroup by sleeping for timeout jiffies.
+ */
+void ide_stall_queue (ide_drive_t *drive, unsigned long timeout);
+
+/*
+ * ide_get_queue() returns the queue which corresponds to a given device.
+ */
+request_queue_t *ide_get_queue (kdev_t dev);
+
+/*
+ * CompactFlash cards and their brethern pretend to be removable hard disks,
+ * but they never have a slave unit, and they don't have doorlock mechanisms.
+ * This test catches them, and is invoked elsewhere when setting appropriate config bits.
+ */
+int drive_is_flashcard (ide_drive_t *drive);
+
+int ide_spin_wait_hwgroup (ide_drive_t *drive);
+void ide_timer_expiry (unsigned long data);
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs);
+void do_ide_request (request_queue_t * q);
+void ide_init_subdrivers (void);
+
+#ifndef _IDE_C
+extern struct block_device_operations ide_fops[];
+/*extern ide_proc_entry_t generic_subdriver_entries[];*/
+#endif
+
+int ide_reinit_drive (ide_drive_t *drive);
+
+#ifdef _IDE_C
+#ifdef CONFIG_BLK_DEV_IDE
+int ideprobe_init (void);
+#endif /* CONFIG_BLK_DEV_IDE */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+int idedisk_reinit (ide_drive_t *drive);
+int idedisk_init (void);
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+int ide_cdrom_reinit (ide_drive_t *drive);
+int ide_cdrom_init (void);
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+int idetape_reinit (ide_drive_t *drive);
+int idetape_init (void);
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+int idefloppy_reinit (ide_drive_t *drive);
+int idefloppy_init (void);
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+int idescsi_reinit (ide_drive_t *drive);
+int idescsi_init (void);
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+#endif /* _IDE_C */
+
+int ide_register_module (ide_module_t *module);
+void ide_unregister_module (ide_module_t *module);
+ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n);
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version);
+int ide_unregister_subdriver (ide_drive_t *drive);
+int ide_replace_subdriver(ide_drive_t *drive, const char *driver);
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+#define ON_BOARD		1
+#define NEVER_BOARD		0
+#ifdef CONFIG_BLK_DEV_OFFBOARD
+#  define OFF_BOARD		ON_BOARD
+#else /* CONFIG_BLK_DEV_OFFBOARD */
+#  define OFF_BOARD		NEVER_BOARD
+#endif /* CONFIG_BLK_DEV_OFFBOARD */
+
+unsigned long ide_find_free_region (unsigned short size) __init;
+void ide_scan_pcibus (int scan_direction) __init;
+#endif
+#ifdef CONFIG_BLK_DEV_IDEDMA
+#define BAD_DMA_DRIVE		0
+#define GOOD_DMA_DRIVE		1
+int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func);
+void ide_destroy_dmatable (ide_drive_t *drive);
+ide_startstop_t ide_dma_intr (ide_drive_t *drive);
+int check_drive_lists (ide_drive_t *drive, int good_bad);
+int report_drive_dmaing (ide_drive_t *drive);
+int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive);
+int ide_release_dma (ide_hwif_t *hwif);
+void ide_setup_dma (ide_hwif_t *hwif, unsigned long dmabase, unsigned int num_ports) __init;
+unsigned long ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name) __init;
+#endif
+
+void hwif_unregister (ide_hwif_t *hwif);
+
+void export_ide_init_queue (ide_drive_t *drive);
+byte export_probe_for_drive (ide_drive_t *drive);
+
+#endif /* _IDE_H */
diff --git a/xen/include/xeno/if.h b/xen/include/xeno/if.h
new file mode 100644
index 0000000000..8d3fc2b7fc
--- /dev/null
+++ b/xen/include/xeno/if.h
@@ -0,0 +1,141 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Global definitions for the INET interface module.
+ *
+ * Version:	@(#)if.h	1.0.2	04/18/93
+ *
+ * Authors:	Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988
+ *		Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IF_H
+#define _LINUX_IF_H
+
+#include <linux/types.h>		/* for "__kernel_caddr_t" et al	*/
+#include <linux/socket.h>		/* for "struct sockaddr" et al	*/
+
+/* Standard interface flags (netdevice->flags). */
+#define	IFF_UP		0x1		/* interface is up		*/
+#define	IFF_BROADCAST	0x2		/* broadcast address valid	*/
+#define	IFF_DEBUG	0x4		/* turn on debugging		*/
+#define	IFF_LOOPBACK	0x8		/* is a loopback net		*/
+#define	IFF_POINTOPOINT	0x10		/* interface is has p-p link	*/
+#define	IFF_NOTRAILERS	0x20		/* avoid use of trailers	*/
+#define	IFF_RUNNING	0x40		/* resources allocated		*/
+#define	IFF_NOARP	0x80		/* no ARP protocol		*/
+#define	IFF_PROMISC	0x100		/* receive all packets		*/
+#define	IFF_ALLMULTI	0x200		/* receive all multicast packets*/
+
+#define IFF_MASTER	0x400		/* master of a load balancer 	*/
+#define IFF_SLAVE	0x800		/* slave of a load balancer	*/
+
+#define IFF_MULTICAST	0x1000		/* Supports multicast		*/
+
+#define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MASTER|IFF_SLAVE|IFF_RUNNING)
+
+#define IFF_PORTSEL	0x2000          /* can set media type		*/
+#define IFF_AUTOMEDIA	0x4000		/* auto media select active	*/
+#define IFF_DYNAMIC	0x8000		/* dialup device with changing addresses*/
+
+/* Private (from user) interface flags (netdevice->priv_flags). */
+#define IFF_802_1Q_VLAN 0x1             /* 802.1Q VLAN device.          */
+
+/*
+ *	Device mapping structure. I'd just gone off and designed a 
+ *	beautiful scheme using only loadable modules with arguments
+ *	for driver options and along come the PCMCIA people 8)
+ *
+ *	Ah well. The get() side of this is good for WDSETUP, and it'll
+ *	be handy for debugging things. The set side is fine for now and
+ *	being very small might be worth keeping for clean configuration.
+ */
+
+struct ifmap 
+{
+	unsigned long mem_start;
+	unsigned long mem_end;
+	unsigned short base_addr; 
+	unsigned char irq;
+	unsigned char dma;
+	unsigned char port;
+	/* 3 bytes spare */
+};
+
+/*`
+ * Interface request structure used for socket
+ * ioctl's.  All interface ioctl's must have parameter
+ * definitions which begin with ifr_name.  The
+ * remainder may be interface specific.
+ */
+
+struct ifreq 
+{
+#define IFHWADDRLEN	6
+#define	IFNAMSIZ	16
+	union
+	{
+		char	ifrn_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	} ifr_ifrn;
+	
+	union {
+		struct	sockaddr ifru_addr;
+		struct	sockaddr ifru_dstaddr;
+		struct	sockaddr ifru_broadaddr;
+		struct	sockaddr ifru_netmask;
+		struct  sockaddr ifru_hwaddr;
+		short	ifru_flags;
+		int	ifru_ivalue;
+		int	ifru_mtu;
+		struct  ifmap ifru_map;
+		char	ifru_slave[IFNAMSIZ];	/* Just fits the size */
+		char	ifru_newname[IFNAMSIZ];
+		char *	ifru_data;
+	} ifr_ifru;
+};
+
+#define ifr_name	ifr_ifrn.ifrn_name	/* interface name 	*/
+#define ifr_hwaddr	ifr_ifru.ifru_hwaddr	/* MAC address 		*/
+#define	ifr_addr	ifr_ifru.ifru_addr	/* address		*/
+#define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-p lnk	*/
+#define	ifr_broadaddr	ifr_ifru.ifru_broadaddr	/* broadcast address	*/
+#define	ifr_netmask	ifr_ifru.ifru_netmask	/* interface net mask	*/
+#define	ifr_flags	ifr_ifru.ifru_flags	/* flags		*/
+#define	ifr_metric	ifr_ifru.ifru_ivalue	/* metric		*/
+#define	ifr_mtu		ifr_ifru.ifru_mtu	/* mtu			*/
+#define ifr_map		ifr_ifru.ifru_map	/* device map		*/
+#define ifr_slave	ifr_ifru.ifru_slave	/* slave device		*/
+#define	ifr_data	ifr_ifru.ifru_data	/* for use by interface	*/
+#define ifr_ifindex	ifr_ifru.ifru_ivalue	/* interface index	*/
+#define ifr_bandwidth	ifr_ifru.ifru_ivalue    /* link bandwidth	*/
+#define ifr_qlen	ifr_ifru.ifru_ivalue	/* Queue length 	*/
+#define ifr_newname	ifr_ifru.ifru_newname	/* New name		*/
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+
+struct ifconf 
+{
+	int	ifc_len;			/* size of buffer	*/
+	union 
+	{
+		char *			ifcu_buf;
+		struct	ifreq 		*ifcu_req;
+	} ifc_ifcu;
+};
+#define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
+#define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
+
+
+#endif /* _LINUX_IF_H */
diff --git a/xen/include/xeno/if_ether.h b/xen/include/xeno/if_ether.h
new file mode 100644
index 0000000000..b64559d713
--- /dev/null
+++ b/xen/include/xeno/if_ether.h
@@ -0,0 +1,100 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Global definitions for the Ethernet IEEE 802.3 interface.
+ *
+ * Version:	@(#)if_ether.h	1.0.1a	02/08/94
+ *
+ * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Donald Becker, <becker@super.org>
+ *		Alan Cox, <alan@redhat.com>
+ *		Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+ 
+#ifndef _LINUX_IF_ETHER_H
+#define _LINUX_IF_ETHER_H
+
+/*
+ *	IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
+ *	and FCS/CRC (frame check sequence). 
+ */
+
+#define ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define ETH_HLEN	14		/* Total octets in header.	 */
+#define ETH_ZLEN	60		/* Min. octets in frame sans FCS */
+#define ETH_DATA_LEN	1500		/* Max. octets in payload	 */
+#define ETH_FRAME_LEN	1514		/* Max. octets in frame sans FCS */
+
+/*
+ *	These are the defined Ethernet Protocol ID's.
+ */
+
+#define ETH_P_LOOP	0x0060		/* Ethernet Loopback packet	*/
+#define ETH_P_PUP	0x0200		/* Xerox PUP packet		*/
+#define ETH_P_PUPAT	0x0201		/* Xerox PUP Addr Trans packet	*/
+#define ETH_P_IP	0x0800		/* Internet Protocol packet	*/
+#define ETH_P_X25	0x0805		/* CCITT X.25			*/
+#define ETH_P_ARP	0x0806		/* Address Resolution packet	*/
+#define	ETH_P_BPQ	0x08FF		/* G8BPQ AX.25 Ethernet Packet	[ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IEEEPUP	0x0a00		/* Xerox IEEE802.3 PUP packet */
+#define ETH_P_IEEEPUPAT	0x0a01		/* Xerox IEEE802.3 PUP Addr Trans packet */
+#define ETH_P_DEC       0x6000          /* DEC Assigned proto           */
+#define ETH_P_DNA_DL    0x6001          /* DEC DNA Dump/Load            */
+#define ETH_P_DNA_RC    0x6002          /* DEC DNA Remote Console       */
+#define ETH_P_DNA_RT    0x6003          /* DEC DNA Routing              */
+#define ETH_P_LAT       0x6004          /* DEC LAT                      */
+#define ETH_P_DIAG      0x6005          /* DEC Diagnostics              */
+#define ETH_P_CUST      0x6006          /* DEC Customer use             */
+#define ETH_P_SCA       0x6007          /* DEC Systems Comms Arch       */
+#define ETH_P_RARP      0x8035		/* Reverse Addr Res packet	*/
+#define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
+#define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/
+#define ETH_P_8021Q	0x8100          /* 802.1Q VLAN Extended Header  */
+#define ETH_P_IPX	0x8137		/* IPX over DIX			*/
+#define ETH_P_IPV6	0x86DD		/* IPv6 over bluebook		*/
+#define ETH_P_PPP_DISC	0x8863		/* PPPoE discovery messages     */
+#define ETH_P_PPP_SES	0x8864		/* PPPoE session messages	*/
+#define ETH_P_ATMMPOA	0x884c		/* MultiProtocol Over ATM	*/
+#define ETH_P_ATMFATE	0x8884		/* Frame-based ATM Transport
+					 * over Ethernet
+					 */
+
+/*
+ *	Non DIX types. Won't clash for 1500 types.
+ */
+ 
+#define ETH_P_802_3	0x0001		/* Dummy type for 802.3 frames  */
+#define ETH_P_AX25	0x0002		/* Dummy protocol id for AX.25  */
+#define ETH_P_ALL	0x0003		/* Every packet (be careful!!!) */
+#define ETH_P_802_2	0x0004		/* 802.2 frames 		*/
+#define ETH_P_SNAP	0x0005		/* Internal only		*/
+#define ETH_P_DDCMP     0x0006          /* DEC DDCMP: Internal only     */
+#define ETH_P_WAN_PPP   0x0007          /* Dummy type for WAN PPP frames*/
+#define ETH_P_PPP_MP    0x0008          /* Dummy type for PPP MP frames */
+#define ETH_P_LOCALTALK 0x0009		/* Localtalk pseudo type 	*/
+#define ETH_P_PPPTALK	0x0010		/* Dummy type for Atalk over PPP*/
+#define ETH_P_TR_802_2	0x0011		/* 802.2 frames 		*/
+#define ETH_P_MOBITEX	0x0015		/* Mobitex (kaz@cafe.net)	*/
+#define ETH_P_CONTROL	0x0016		/* Card specific control frames */
+#define ETH_P_IRDA	0x0017		/* Linux-IrDA			*/
+#define ETH_P_ECONET	0x0018		/* Acorn Econet			*/
+
+/*
+ *	This is an Ethernet frame header.
+ */
+ 
+struct ethhdr 
+{
+	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
+	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
+	unsigned short	h_proto;		/* packet type ID field	*/
+};
+
+#endif	/* _LINUX_IF_ETHER_H */
diff --git a/xen/include/xeno/if_packet.h b/xen/include/xeno/if_packet.h
new file mode 100644
index 0000000000..b92558549d
--- /dev/null
+++ b/xen/include/xeno/if_packet.h
@@ -0,0 +1,102 @@
+#ifndef __LINUX_IF_PACKET_H
+#define __LINUX_IF_PACKET_H
+
+struct sockaddr_pkt
+{
+	unsigned short spkt_family;
+	unsigned char spkt_device[14];
+	unsigned short spkt_protocol;
+};
+
+struct sockaddr_ll
+{
+	unsigned short	sll_family;
+	unsigned short	sll_protocol;
+	int		sll_ifindex;
+	unsigned short	sll_hatype;
+	unsigned char	sll_pkttype;
+	unsigned char	sll_halen;
+	unsigned char	sll_addr[8];
+};
+
+/* Packet types */
+
+#define PACKET_HOST		0		/* To us		*/
+#define PACKET_BROADCAST	1		/* To all		*/
+#define PACKET_MULTICAST	2		/* To group		*/
+#define PACKET_OTHERHOST	3		/* To someone else 	*/
+#define PACKET_OUTGOING		4		/* Outgoing of any type */
+/* These ones are invisible by user level */
+#define PACKET_LOOPBACK		5		/* MC/BRD frame looped back */
+#define PACKET_FASTROUTE	6		/* Fastrouted frame	*/
+
+/* Packet socket options */
+
+#define PACKET_ADD_MEMBERSHIP		1
+#define PACKET_DROP_MEMBERSHIP		2
+#define PACKET_RECV_OUTPUT		3
+/* Value 4 is still used by obsolete turbo-packet. */
+#define PACKET_RX_RING			5
+#define PACKET_STATISTICS		6
+#define PACKET_COPY_THRESH		7
+
+struct tpacket_stats
+{
+	unsigned int	tp_packets;
+	unsigned int	tp_drops;
+};
+
+struct tpacket_hdr
+{
+	unsigned long	tp_status;
+#define TP_STATUS_KERNEL	0
+#define TP_STATUS_USER		1
+#define TP_STATUS_COPY		2
+#define TP_STATUS_LOSING	4
+#define TP_STATUS_CSUMNOTREADY	8
+	unsigned int	tp_len;
+	unsigned int	tp_snaplen;
+	unsigned short	tp_mac;
+	unsigned short	tp_net;
+	unsigned int	tp_sec;
+	unsigned int	tp_usec;
+};
+
+#define TPACKET_ALIGNMENT	16
+#define TPACKET_ALIGN(x)	(((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
+#define TPACKET_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
+
+/*
+   Frame structure:
+
+   - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
+   - struct tpacket_hdr
+   - pad to TPACKET_ALIGNMENT=16
+   - struct sockaddr_ll
+   - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
+   - Start+tp_mac: [ Optional MAC header ]
+   - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
+   - Pad to align to TPACKET_ALIGNMENT=16
+ */
+
+struct tpacket_req
+{
+	unsigned int	tp_block_size;	/* Minimal size of contiguous block */
+	unsigned int	tp_block_nr;	/* Number of blocks */
+	unsigned int	tp_frame_size;	/* Size of frame */
+	unsigned int	tp_frame_nr;	/* Total number of frames */
+};
+
+struct packet_mreq
+{
+	int		mr_ifindex;
+	unsigned short	mr_type;
+	unsigned short	mr_alen;
+	unsigned char	mr_address[8];
+};
+
+#define PACKET_MR_MULTICAST	0
+#define PACKET_MR_PROMISC	1
+#define PACKET_MR_ALLMULTI	2
+
+#endif
diff --git a/xen/include/xeno/if_vlan.h b/xen/include/xeno/if_vlan.h
new file mode 100644
index 0000000000..d3e96bc4cf
--- /dev/null
+++ b/xen/include/xeno/if_vlan.h
@@ -0,0 +1,256 @@
+/*
+ * VLAN		An implementation of 802.1Q VLAN tagging.
+ *
+ * Authors:	Ben Greear <greearb@candelatech.com>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef _LINUX_IF_VLAN_H_
+#define _LINUX_IF_VLAN_H_
+
+#ifdef __KERNEL__
+
+/* externally defined structs */
+struct vlan_group;
+struct net_device;
+struct sk_buff;
+struct packet_type;
+struct vlan_collection;
+struct vlan_dev_info;
+
+//#include <linux/proc_fs.h> /* for proc_dir_entry */
+#include <linux/netdevice.h>
+
+#define VLAN_HLEN	4		/* The additional bytes (on top of the Ethernet header)
+					 * that VLAN requires.
+					 */
+#define VLAN_ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define VLAN_ETH_HLEN	18		/* Total octets in header.	 */
+#define VLAN_ETH_ZLEN	64		/* Min. octets in frame sans FCS */
+
+/*
+ * According to 802.3ac, the packet can be 4 bytes longer. --Klika Jan
+ */
+#define VLAN_ETH_DATA_LEN	1500	/* Max. octets in payload	 */
+#define VLAN_ETH_FRAME_LEN	1518	/* Max. octets in frame sans FCS */
+
+struct vlan_ethhdr {
+   unsigned char	h_dest[ETH_ALEN];	   /* destination eth addr	*/
+   unsigned char	h_source[ETH_ALEN];	   /* source ether addr	*/
+   unsigned short       h_vlan_proto;              /* Should always be 0x8100 */
+   unsigned short       h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
+   unsigned short	h_vlan_encapsulated_proto; /* packet type ID field (or len) */
+};
+
+struct vlan_hdr {
+   unsigned short       h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
+   unsigned short       h_vlan_encapsulated_proto; /* packet type ID field (or len) */
+};
+
+#define VLAN_VID_MASK	0xfff
+
+/* found in af_inet.c */
+extern int (*vlan_ioctl_hook)(unsigned long arg);
+
+#define VLAN_NAME "vlan"
+
+/* if this changes, algorithm will have to be reworked because this
+ * depends on completely exhausting the VLAN identifier space.  Thus
+ * it gives constant time look-up, but in many cases it wastes memory.
+ */
+#define VLAN_GROUP_ARRAY_LEN 4096
+
+struct vlan_group {
+	int real_dev_ifindex; /* The ifindex of the ethernet(like) device the vlan is attached to. */
+	struct net_device *vlan_devices[VLAN_GROUP_ARRAY_LEN];
+
+	struct vlan_group *next; /* the next in the list */
+};
+
+struct vlan_priority_tci_mapping {
+	unsigned long priority;
+	unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
+				  * at provisioning time.
+				  * ((skb->priority << 13) & 0xE000)
+				  */
+	struct vlan_priority_tci_mapping *next;
+};
+
+/* Holds information that makes sense if this device is a VLAN device. */
+struct vlan_dev_info {
+	/** This will be the mapping that correlates skb->priority to
+	 * 3 bits of VLAN QOS tags...
+	 */
+	unsigned long ingress_priority_map[8];
+	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
+
+	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
+	unsigned short flags;          /* (1 << 0) re_order_header   This option will cause the
+                                        *   VLAN code to move around the ethernet header on
+                                        *   ingress to make the skb look **exactly** like it
+                                        *   came in from an ethernet port.  This destroys some of
+                                        *   the VLAN information in the skb, but it fixes programs
+                                        *   like DHCP that use packet-filtering and don't understand
+                                        *   802.1Q
+                                        */
+	struct dev_mc_list *old_mc_list;  /* old multi-cast list for the VLAN interface..
+                                           * we save this so we can tell what changes were
+                                           * made, in order to feed the right changes down
+                                           * to the real hardware...
+                                           */
+	int old_allmulti;               /* similar to above. */
+	int old_promiscuity;            /* similar to above. */
+	struct net_device *real_dev;    /* the underlying device/interface */
+	struct proc_dir_entry *dent;    /* Holds the proc data */
+	unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */
+	unsigned long cnt_encap_on_xmit;      /* How many times did we have to encapsulate the skb on TX. */
+	struct net_device_stats dev_stats; /* Device stats (rx-bytes, tx-pkts, etc...) */
+};
+
+#define VLAN_DEV_INFO(x) ((struct vlan_dev_info *)(x->priv))
+
+/* inline functions */
+
+static inline struct net_device_stats *vlan_dev_get_stats(struct net_device *dev)
+{
+	return &(VLAN_DEV_INFO(dev)->dev_stats);
+}
+
+static inline __u32 vlan_get_ingress_priority(struct net_device *dev,
+					      unsigned short vlan_tag)
+{
+	struct vlan_dev_info *vip = VLAN_DEV_INFO(dev);
+
+	return vip->ingress_priority_map[(vlan_tag >> 13) & 0x7];
+}
+
+/* VLAN tx hw acceleration helpers. */
+struct vlan_skb_tx_cookie {
+	u32	magic;
+	u32	vlan_tag;
+};
+
+#if 0
+#define VLAN_TX_COOKIE_MAGIC	0x564c414e	/* "VLAN" in ascii. */
+#define VLAN_TX_SKB_CB(__skb)	((struct vlan_skb_tx_cookie *)&((__skb)->cb[0]))
+#define vlan_tx_tag_present(__skb) \
+	(VLAN_TX_SKB_CB(__skb)->magic == VLAN_TX_COOKIE_MAGIC)
+#define vlan_tx_tag_get(__skb)	(VLAN_TX_SKB_CB(__skb)->vlan_tag)
+#else /* XXX KAF: We don't support vlan tagging at the moment. */
+#define VLAN_TX_SKB_CB(__skb)	   NULL
+#define vlan_tx_tag_present(__skb) 0
+#define vlan_tx_tag_get(__skb)	   0
+#endif
+
+#if 0
+/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
+static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
+				    struct vlan_group *grp,
+				    unsigned short vlan_tag, int polling)
+{
+	struct net_device_stats *stats;
+
+	skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
+	if (skb->dev == NULL) {
+		kfree_skb(skb);
+
+		/* Not NET_RX_DROP, this is not being dropped
+		 * due to congestion.
+		 */
+		return 0;
+	}
+
+	skb->dev->last_rx = jiffies;
+
+	stats = vlan_dev_get_stats(skb->dev);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+
+	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tag);
+	switch (skb->pkt_type) {
+	case PACKET_BROADCAST:
+		break;
+
+	case PACKET_MULTICAST:
+		stats->multicast++;
+		break;
+
+	case PACKET_OTHERHOST:
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the underlying
+		 * device, and still route correctly.
+		 */
+		if (!memcmp(skb->mac.ethernet->h_dest, skb->dev->dev_addr, ETH_ALEN))
+			skb->pkt_type = PACKET_HOST;
+		break;
+	};
+
+#ifdef NAPI
+	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+#else
+        return netif_rx(skb);
+#endif
+}
+
+static inline int vlan_hwaccel_rx(struct sk_buff *skb,
+				  struct vlan_group *grp,
+				  unsigned short vlan_tag)
+{
+	return __vlan_hwaccel_rx(skb, grp, vlan_tag, 0);
+}
+
+static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
+					   struct vlan_group *grp,
+					   unsigned short vlan_tag)
+{
+	return __vlan_hwaccel_rx(skb, grp, vlan_tag, 1);
+}
+#else
+#define vlan_hwaccel_rx(_skb, _grp, _tag) (netif_rx(_skb))
+#endif
+#endif /* __KERNEL__ */
+
+/* VLAN IOCTLs are found in sockios.h */
+
+/* Passed in vlan_ioctl_args structure to determine behaviour. */
+enum vlan_ioctl_cmds {
+	ADD_VLAN_CMD,
+	DEL_VLAN_CMD,
+	SET_VLAN_INGRESS_PRIORITY_CMD,
+	SET_VLAN_EGRESS_PRIORITY_CMD,
+	GET_VLAN_INGRESS_PRIORITY_CMD,
+	GET_VLAN_EGRESS_PRIORITY_CMD,
+	SET_VLAN_NAME_TYPE_CMD,
+	SET_VLAN_FLAG_CMD
+};
+
+enum vlan_name_types {
+	VLAN_NAME_TYPE_PLUS_VID, /* Name will look like:  vlan0005 */
+	VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like:  eth1.0005 */
+	VLAN_NAME_TYPE_PLUS_VID_NO_PAD, /* Name will look like:  vlan5 */
+	VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD, /* Name will look like:  eth0.5 */
+	VLAN_NAME_TYPE_HIGHEST
+};
+
+struct vlan_ioctl_args {
+	int cmd; /* Should be one of the vlan_ioctl_cmds enum above. */
+	char device1[24];
+
+        union {
+		char device2[24];
+		int VID;
+		unsigned int skb_priority;
+		unsigned int name_type;
+		unsigned int bind_type;
+		unsigned int flag; /* Matches vlan_dev_info flags */
+        } u;
+
+	short vlan_qos;   
+};
+
+#endif /* !(_LINUX_IF_VLAN_H_) */
diff --git a/xen/include/xeno/in.h b/xen/include/xeno/in.h
new file mode 100644
index 0000000000..6c090e6e4f
--- /dev/null
+++ b/xen/include/xeno/in.h
@@ -0,0 +1,191 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions of the Internet Protocol.
+ *
+ * Version:	@(#)in.h	1.0.1	04/21/93
+ *
+ * Authors:	Original taken from the GNU Project <netinet/in.h> file.
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IN_H
+#define _LINUX_IN_H
+
+#include <xeno/types.h>
+#include <xeno/socket.h>
+
+/* Standard well-defined IP protocols.  */
+enum {
+  IPPROTO_IP = 0,		/* Dummy protocol for TCP		*/
+  IPPROTO_ICMP = 1,		/* Internet Control Message Protocol	*/
+  IPPROTO_IGMP = 2,		/* Internet Group Management Protocol	*/
+  IPPROTO_IPIP = 4,		/* IPIP tunnels (older KA9Q tunnels use 94) */
+  IPPROTO_TCP = 6,		/* Transmission Control Protocol	*/
+  IPPROTO_EGP = 8,		/* Exterior Gateway Protocol		*/
+  IPPROTO_PUP = 12,		/* PUP protocol				*/
+  IPPROTO_UDP = 17,		/* User Datagram Protocol		*/
+  IPPROTO_IDP = 22,		/* XNS IDP protocol			*/
+  IPPROTO_RSVP = 46,		/* RSVP protocol			*/
+  IPPROTO_GRE = 47,		/* Cisco GRE tunnels (rfc 1701,1702)	*/
+
+  IPPROTO_IPV6	 = 41,		/* IPv6-in-IPv4 tunnelling		*/
+
+  IPPROTO_PIM    = 103,		/* Protocol Independent Multicast	*/
+
+  IPPROTO_ESP = 50,            /* Encapsulation Security Payload protocol */
+  IPPROTO_AH = 51,             /* Authentication Header protocol       */
+  IPPROTO_COMP   = 108,                /* Compression Header protocol */
+
+  IPPROTO_RAW	 = 255,		/* Raw IP packets			*/
+  IPPROTO_MAX
+};
+
+
+/* Internet address. */
+struct in_addr {
+	__u32	s_addr;
+};
+
+#define IP_TOS		1
+#define IP_TTL		2
+#define IP_HDRINCL	3
+#define IP_OPTIONS	4
+#define IP_ROUTER_ALERT	5
+#define IP_RECVOPTS	6
+#define IP_RETOPTS	7
+#define IP_PKTINFO	8
+#define IP_PKTOPTIONS	9
+#define IP_MTU_DISCOVER	10
+#define IP_RECVERR	11
+#define IP_RECVTTL	12
+#define	IP_RECVTOS	13
+#define IP_MTU		14
+#define IP_FREEBIND	15
+
+/* BSD compatibility */
+#define IP_RECVRETOPTS	IP_RETOPTS
+
+/* IP_MTU_DISCOVER values */
+#define IP_PMTUDISC_DONT		0	/* Never send DF frames */
+#define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
+#define IP_PMTUDISC_DO			2	/* Always DF		*/
+
+#define IP_MULTICAST_IF			32
+#define IP_MULTICAST_TTL 		33
+#define IP_MULTICAST_LOOP 		34
+#define IP_ADD_MEMBERSHIP		35
+#define IP_DROP_MEMBERSHIP		36
+
+/* These need to appear somewhere around here */
+#define IP_DEFAULT_MULTICAST_TTL        1
+#define IP_DEFAULT_MULTICAST_LOOP       1
+
+/* Request struct for multicast socket ops */
+
+struct ip_mreq 
+{
+	struct in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct in_addr imr_interface;	/* local IP address of interface */
+};
+
+struct ip_mreqn
+{
+	struct in_addr	imr_multiaddr;		/* IP multicast address of group */
+	struct in_addr	imr_address;		/* local IP address of interface */
+	int		imr_ifindex;		/* Interface index */
+};
+
+struct in_pktinfo
+{
+	int		ipi_ifindex;
+	struct in_addr	ipi_spec_dst;
+	struct in_addr	ipi_addr;
+};
+
+/* Structure describing an Internet (IP) socket address. */
+#define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
+struct sockaddr_in {
+  sa_family_t		sin_family;	/* Address family		*/
+  unsigned short int	sin_port;	/* Port number			*/
+  struct in_addr	sin_addr;	/* Internet address		*/
+
+  /* Pad to size of `struct sockaddr'. */
+  unsigned char		__pad[__SOCK_SIZE__ - sizeof(short int) -
+			sizeof(unsigned short int) - sizeof(struct in_addr)];
+};
+#define sin_zero	__pad		/* for BSD UNIX comp. -FvK	*/
+
+
+/*
+ * Definitions of the bits in an Internet address integer.
+ * On subnets, host and network parts are found according
+ * to the subnet mask, not these masks.
+ */
+#define	IN_CLASSA(a)		((((long int) (a)) & 0x80000000) == 0)
+#define	IN_CLASSA_NET		0xff000000
+#define	IN_CLASSA_NSHIFT	24
+#define	IN_CLASSA_HOST		(0xffffffff & ~IN_CLASSA_NET)
+#define	IN_CLASSA_MAX		128
+
+#define	IN_CLASSB(a)		((((long int) (a)) & 0xc0000000) == 0x80000000)
+#define	IN_CLASSB_NET		0xffff0000
+#define	IN_CLASSB_NSHIFT	16
+#define	IN_CLASSB_HOST		(0xffffffff & ~IN_CLASSB_NET)
+#define	IN_CLASSB_MAX		65536
+
+#define	IN_CLASSC(a)		((((long int) (a)) & 0xe0000000) == 0xc0000000)
+#define	IN_CLASSC_NET		0xffffff00
+#define	IN_CLASSC_NSHIFT	8
+#define	IN_CLASSC_HOST		(0xffffffff & ~IN_CLASSC_NET)
+
+#define	IN_CLASSD(a)		((((long int) (a)) & 0xf0000000) == 0xe0000000)
+#define	IN_MULTICAST(a)		IN_CLASSD(a)
+#define IN_MULTICAST_NET	0xF0000000
+
+#define	IN_EXPERIMENTAL(a)	((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define	IN_BADCLASS(a)		IN_EXPERIMENTAL((a))
+
+/* Address to accept any incoming messages. */
+#define	INADDR_ANY		((unsigned long int) 0x00000000)
+
+/* Address to send to all hosts. */
+#define	INADDR_BROADCAST	((unsigned long int) 0xffffffff)
+
+/* Address indicating an error return. */
+#define	INADDR_NONE		((unsigned long int) 0xffffffff)
+
+/* Network number for local host loopback. */
+#define	IN_LOOPBACKNET		127
+
+/* Address to loopback in software to local host.  */
+#define	INADDR_LOOPBACK		0x7f000001	/* 127.0.0.1   */
+#define	IN_LOOPBACK(a)		((((long int) (a)) & 0xff000000) == 0x7f000000)
+
+/* Defines for Multicast INADDR */
+#define INADDR_UNSPEC_GROUP   	0xe0000000U	/* 224.0.0.0   */
+#define INADDR_ALLHOSTS_GROUP 	0xe0000001U	/* 224.0.0.1   */
+#define INADDR_ALLRTRS_GROUP    0xe0000002U	/* 224.0.0.2 */
+#define INADDR_MAX_LOCAL_GROUP  0xe00000ffU	/* 224.0.0.255 */
+
+
+/* <asm/byteorder.h> contains the htonl type stuff.. */
+#include <asm/byteorder.h> 
+
+#ifdef __KERNEL__
+/* Some random defines to make it easier in the kernel.. */
+#define LOOPBACK(x)	(((x) & htonl(0xff000000)) == htonl(0x7f000000))
+#define MULTICAST(x)	(((x) & htonl(0xf0000000)) == htonl(0xe0000000))
+#define BADCLASS(x)	(((x) & htonl(0xf0000000)) == htonl(0xf0000000))
+#define ZERONET(x)	(((x) & htonl(0xff000000)) == htonl(0x00000000))
+#define LOCAL_MCAST(x)	(((x) & htonl(0xFFFFFF00)) == htonl(0xE0000000))
+
+#endif
+
+#endif	/* _LINUX_IN_H */
diff --git a/xen/include/xeno/init.h b/xen/include/xeno/init.h
new file mode 100644
index 0000000000..5c4477f18c
--- /dev/null
+++ b/xen/include/xeno/init.h
@@ -0,0 +1,170 @@
+#ifndef _LINUX_INIT_H
+#define _LINUX_INIT_H
+
+#include <linux/config.h>
+
+/* These macros are used to mark some functions or 
+ * initialized data (doesn't apply to uninitialized data)
+ * as `initialization' functions. The kernel can take this
+ * as hint that the function is used only during the initialization
+ * phase and free up used memory resources after
+ *
+ * Usage:
+ * For functions:
+ * 
+ * You should add __init immediately before the function name, like:
+ *
+ * static void __init initme(int x, int y)
+ * {
+ *    extern int z; z = x * y;
+ * }
+ *
+ * If the function has a prototype somewhere, you can also add
+ * __init between closing brace of the prototype and semicolon:
+ *
+ * extern int initialize_foobar_device(int, int, int) __init;
+ *
+ * For initialized data:
+ * You should insert __initdata between the variable name and equal
+ * sign followed by value, e.g.:
+ *
+ * static int init_variable __initdata = 0;
+ * static char linux_logo[] __initdata = { 0x32, 0x36, ... };
+ *
+ * Don't forget to initialize data not at file scope, i.e. within a function,
+ * as gcc otherwise puts the data into the bss section and not into the init
+ * section.
+ * 
+ * Also note, that this data cannot be "const".
+ */
+
+#ifndef MODULE
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used for initialization calls..
+ */
+typedef int (*initcall_t)(void);
+typedef void (*exitcall_t)(void);
+
+extern initcall_t __initcall_start, __initcall_end;
+
+#define __initcall(fn)								\
+	static initcall_t __initcall_##fn __init_call = fn
+#define __exitcall(fn)								\
+	static exitcall_t __exitcall_##fn __exit_call = fn
+
+/*
+ * Used for kernel command line parameter setup
+ */
+struct kernel_param {
+	const char *str;
+	int (*setup_func)(char *);
+};
+
+extern struct kernel_param __setup_start, __setup_end;
+
+#define __setup(str, fn)								\
+	static char __setup_str_##fn[] __initdata = str;				\
+	static struct kernel_param __setup_##fn __attribute__((unused)) __initsetup = { __setup_str_##fn, fn }
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Mark functions and data as being only used at initialization
+ * or exit time.
+ */
+#define __init		__attribute__ ((__section__ (".text.init")))
+#define __exit		__attribute__ ((unused, __section__(".text.exit")))
+#define __initdata	__attribute__ ((__section__ (".data.init")))
+#define __exitdata	__attribute__ ((unused, __section__ (".data.exit")))
+#define __initsetup	__attribute__ ((unused,__section__ (".setup.init")))
+#define __init_call	__attribute__ ((unused,__section__ (".initcall.init")))
+#define __exit_call	__attribute__ ((unused,__section__ (".exitcall.exit")))
+
+/* For assembly routines */
+#define __INIT		.section	".text.init","ax"
+#define __FINIT		.previous
+#define __INITDATA	.section	".data.init","aw"
+
+/**
+ * module_init() - driver initialization entry point
+ * @x: function to be run at kernel boot time or module insertion
+ * 
+ * module_init() will add the driver initialization routine in
+ * the "__initcall.int" code segment if the driver is checked as
+ * "y" or static, or else it will wrap the driver initialization
+ * routine with init_module() which is used by insmod and
+ * modprobe when the driver is used as a module.
+ */
+#define module_init(x)	__initcall(x);
+
+/**
+ * module_exit() - driver exit entry point
+ * @x: function to be run when driver is removed
+ * 
+ * module_exit() will wrap the driver clean-up code
+ * with cleanup_module() when used with rmmod when
+ * the driver is a module.  If the driver is statically
+ * compiled into the kernel, module_exit() has no effect.
+ */
+#define module_exit(x)	__exitcall(x);
+
+#else
+
+#define __init
+#define __exit
+#define __initdata
+#define __exitdata
+#define __initcall(fn)
+/* For assembly routines */
+#define __INIT
+#define __FINIT
+#define __INITDATA
+
+/* These macros create a dummy inline: gcc 2.9x does not count alias
+ as usage, hence the `unused function' warning when __init functions
+ are declared static. We use the dummy __*_module_inline functions
+ both to kill the warning and check the type of the init/cleanup
+ function. */
+typedef int (*__init_module_func_t)(void);
+typedef void (*__cleanup_module_func_t)(void);
+#define module_init(x) \
+	int init_module(void) __attribute__((alias(#x))); \
+	static inline __init_module_func_t __init_module_inline(void) \
+	{ return x; }
+#define module_exit(x) \
+	void cleanup_module(void) __attribute__((alias(#x))); \
+	static inline __cleanup_module_func_t __cleanup_module_inline(void) \
+	{ return x; }
+
+#define __setup(str,func) /* nothing */
+
+#endif
+
+#ifdef CONFIG_HOTPLUG
+#define __devinit
+#define __devinitdata
+#define __devexit
+#define __devexitdata
+#else
+#define __devinit __init
+#define __devinitdata __initdata
+#define __devexit __exit
+#define __devexitdata __exitdata
+#endif
+
+/* Functions marked as __devexit may be discarded at kernel link time, depending
+   on config options.  Newer versions of binutils detect references from
+   retained sections to discarded sections and flag an error.  Pointers to
+   __devexit functions must use __devexit_p(function_name), the wrapper will
+   insert either the function_name or NULL, depending on the config options.
+ */
+#if defined(MODULE) || defined(CONFIG_HOTPLUG)
+#define __devexit_p(x) x
+#else
+#define __devexit_p(x) NULL
+#endif
+
+#endif /* _LINUX_INIT_H */
diff --git a/xen/include/xeno/interrupt.h b/xen/include/xeno/interrupt.h
new file mode 100644
index 0000000000..488809b99a
--- /dev/null
+++ b/xen/include/xeno/interrupt.h
@@ -0,0 +1,258 @@
+/* interrupt.h */
+#ifndef _LINUX_INTERRUPT_H
+#define _LINUX_INTERRUPT_H
+
+#include <linux/config.h>
+//#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+
+#include <asm/bitops.h>
+#include <asm/atomic.h>
+#include <asm/ptrace.h>
+
+struct irqaction {
+	void (*handler)(int, void *, struct pt_regs *);
+	unsigned long flags;
+	unsigned long mask;
+	const char *name;
+	void *dev_id;
+	struct irqaction *next;
+};
+
+
+/* Who gets which entry in bh_base.  Things which will occur most often
+   should come first */
+   
+enum {
+	TIMER_BH = 0,
+	TQUEUE_BH,
+	SCSI_BH,
+	IMMEDIATE_BH
+};
+
+#include <asm/hardirq.h>
+#include <asm/softirq.h>
+
+
+
+/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
+   frequency threaded job scheduling. For almost all the purposes
+   tasklets are more than enough. F.e. all serial device BHs et
+   al. should be converted to tasklets, not to softirqs.
+ */
+
+enum
+{
+	HI_SOFTIRQ=0,
+	TASKLET_SOFTIRQ
+};
+
+/* softirq mask and active fields moved to irq_cpustat_t in
+ * asm/hardirq.h to get better cache usage.  KAO
+ */
+
+struct softirq_action
+{
+	void	(*action)(struct softirq_action *);
+	void	*data;
+};
+
+asmlinkage void do_softirq(void);
+extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
+extern void softirq_init(void);
+#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0)
+extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr));
+extern void FASTCALL(raise_softirq(unsigned int nr));
+
+
+
+/* Tasklets --- multithreaded analogue of BHs.
+
+   Main feature differing them of generic softirqs: tasklet
+   is running only on one CPU simultaneously.
+
+   Main feature differing them of BHs: different tasklets
+   may be run simultaneously on different CPUs.
+
+   Properties:
+   * If tasklet_schedule() is called, then tasklet is guaranteed
+     to be executed on some cpu at least once after this.
+   * If the tasklet is already scheduled, but its excecution is still not
+     started, it will be executed only once.
+   * If this tasklet is already running on another CPU (or schedule is called
+     from tasklet itself), it is rescheduled for later.
+   * Tasklet is strictly serialized wrt itself, but not
+     wrt another tasklets. If client needs some intertask synchronization,
+     he makes it with spinlocks.
+ */
+
+struct tasklet_struct
+{
+	struct tasklet_struct *next;
+	unsigned long state;
+	atomic_t count;
+	void (*func)(unsigned long);
+	unsigned long data;
+};
+
+#define DECLARE_TASKLET(name, func, data) \
+struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data }
+
+#define DECLARE_TASKLET_DISABLED(name, func, data) \
+struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
+
+
+enum
+{
+	TASKLET_STATE_SCHED,	/* Tasklet is scheduled for execution */
+	TASKLET_STATE_RUN	/* Tasklet is running (SMP only) */
+};
+
+struct tasklet_head
+{
+	struct tasklet_struct *list;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct tasklet_head tasklet_vec[NR_CPUS];
+extern struct tasklet_head tasklet_hi_vec[NR_CPUS];
+
+#ifdef CONFIG_SMP
+static inline int tasklet_trylock(struct tasklet_struct *t)
+{
+	return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
+}
+
+static inline void tasklet_unlock(struct tasklet_struct *t)
+{
+	smp_mb__before_clear_bit(); 
+	clear_bit(TASKLET_STATE_RUN, &(t)->state);
+}
+
+static inline void tasklet_unlock_wait(struct tasklet_struct *t)
+{
+	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+}
+#else
+#define tasklet_trylock(t) 1
+#define tasklet_unlock_wait(t) do { } while (0)
+#define tasklet_unlock(t) do { } while (0)
+#endif
+
+extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
+
+static inline void tasklet_schedule(struct tasklet_struct *t)
+{
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+		__tasklet_schedule(t);
+}
+
+extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t));
+
+static inline void tasklet_hi_schedule(struct tasklet_struct *t)
+{
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+		__tasklet_hi_schedule(t);
+}
+
+
+static inline void tasklet_disable_nosync(struct tasklet_struct *t)
+{
+	atomic_inc(&t->count);
+	smp_mb__after_atomic_inc();
+}
+
+static inline void tasklet_disable(struct tasklet_struct *t)
+{
+	tasklet_disable_nosync(t);
+	tasklet_unlock_wait(t);
+	smp_mb();
+}
+
+static inline void tasklet_enable(struct tasklet_struct *t)
+{
+	smp_mb__before_atomic_dec();
+	if (atomic_dec_and_test(&t->count) &&
+	    test_bit(TASKLET_STATE_SCHED, &t->state))
+		__tasklet_schedule(t);
+}
+
+static inline void tasklet_hi_enable(struct tasklet_struct *t)
+{
+	smp_mb__before_atomic_dec();
+	if (atomic_dec_and_test(&t->count) &&
+	    test_bit(TASKLET_STATE_SCHED, &t->state))
+		__tasklet_hi_schedule(t);
+}
+
+extern void tasklet_kill(struct tasklet_struct *t);
+extern void tasklet_init(struct tasklet_struct *t,
+			 void (*func)(unsigned long), unsigned long data);
+
+#ifdef CONFIG_SMP
+
+#define SMP_TIMER_NAME(name) name##__thr
+
+#define SMP_TIMER_DEFINE(name, task) \
+DECLARE_TASKLET(task, name##__thr, 0); \
+static void name (unsigned long dummy) \
+{ \
+	tasklet_schedule(&(task)); \
+}
+
+#else /* CONFIG_SMP */
+
+#define SMP_TIMER_NAME(name) name
+#define SMP_TIMER_DEFINE(name, task)
+
+#endif /* CONFIG_SMP */
+
+
+/* Old BH definitions */
+
+extern struct tasklet_struct bh_task_vec[];
+
+/* It is exported _ONLY_ for wait_on_irq(). */
+extern spinlock_t global_bh_lock;
+
+static inline void mark_bh(int nr)
+{
+	tasklet_hi_schedule(bh_task_vec+nr);
+}
+
+extern void init_bh(int nr, void (*routine)(void));
+extern void remove_bh(int nr);
+
+
+/*
+ * Autoprobing for irqs:
+ *
+ * probe_irq_on() and probe_irq_off() provide robust primitives
+ * for accurate IRQ probing during kernel initialization.  They are
+ * reasonably simple to use, are not "fooled" by spurious interrupts,
+ * and, unlike other attempts at IRQ probing, they do not get hung on
+ * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards).
+ *
+ * For reasonably foolproof probing, use them as follows:
+ *
+ * 1. clear and/or mask the device's internal interrupt.
+ * 2. sti();
+ * 3. irqs = probe_irq_on();      // "take over" all unassigned idle IRQs
+ * 4. enable the device and cause it to trigger an interrupt.
+ * 5. wait for the device to interrupt, using non-intrusive polling or a delay.
+ * 6. irq = probe_irq_off(irqs);  // get IRQ number, 0=none, negative=multiple
+ * 7. service the device to clear its pending interrupt.
+ * 8. loop again if paranoia is required.
+ *
+ * probe_irq_on() returns a mask of allocated irq's.
+ *
+ * probe_irq_off() takes the mask as a parameter,
+ * and returns the irq number which occurred,
+ * or zero if none occurred, or a negative irq number
+ * if more than one irq occurred.
+ */
+extern unsigned long probe_irq_on(void);	/* returns 0 on failure */
+extern int probe_irq_off(unsigned long);	/* returns 0 or negative on failure */
+extern unsigned int probe_irq_mask(unsigned long);	/* returns mask of ISA interrupts */
+
+#endif
diff --git a/xen/include/xeno/ioctl.h b/xen/include/xeno/ioctl.h
new file mode 100644
index 0000000000..aa91eb3951
--- /dev/null
+++ b/xen/include/xeno/ioctl.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_IOCTL_H
+#define _LINUX_IOCTL_H
+
+#include <asm/ioctl.h>
+
+#endif /* _LINUX_IOCTL_H */
+
diff --git a/xen/include/xeno/ioport.h b/xen/include/xeno/ioport.h
new file mode 100644
index 0000000000..0416edc71e
--- /dev/null
+++ b/xen/include/xeno/ioport.h
@@ -0,0 +1,121 @@
+/*
+ * ioport.h	Definitions of routines for detecting, reserving and
+ *		allocating system resources.
+ *
+ * Authors:	Linus Torvalds
+ */
+
+#ifndef _LINUX_IOPORT_H
+#define _LINUX_IOPORT_H
+
+/*
+ * Resources are tree-like, allowing
+ * nesting etc..
+ */
+struct resource {
+	const char *name;
+	unsigned long start, end;
+	unsigned long flags;
+	struct resource *parent, *sibling, *child;
+};
+
+struct resource_list {
+	struct resource_list *next;
+	struct resource *res;
+	struct pci_dev *dev;
+};
+
+/*
+ * IO resources have these defined flags.
+ */
+#define IORESOURCE_BITS		0x000000ff	/* Bus-specific bits */
+
+#define IORESOURCE_IO		0x00000100	/* Resource type */
+#define IORESOURCE_MEM		0x00000200
+#define IORESOURCE_IRQ		0x00000400
+#define IORESOURCE_DMA		0x00000800
+
+#define IORESOURCE_PREFETCH	0x00001000	/* No side effects */
+#define IORESOURCE_READONLY	0x00002000
+#define IORESOURCE_CACHEABLE	0x00004000
+#define IORESOURCE_RANGELENGTH	0x00008000
+#define IORESOURCE_SHADOWABLE	0x00010000
+#define IORESOURCE_BUS_HAS_VGA	0x00080000
+
+#define IORESOURCE_UNSET	0x20000000
+#define IORESOURCE_AUTO		0x40000000
+#define IORESOURCE_BUSY		0x80000000	/* Driver has marked this resource busy */
+
+/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_IRQ_HIGHEDGE		(1<<0)
+#define IORESOURCE_IRQ_LOWEDGE		(1<<1)
+#define IORESOURCE_IRQ_HIGHLEVEL	(1<<2)
+#define IORESOURCE_IRQ_LOWLEVEL		(1<<3)
+
+/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_DMA_TYPE_MASK	(3<<0)
+#define IORESOURCE_DMA_8BIT		(0<<0)
+#define IORESOURCE_DMA_8AND16BIT	(1<<0)
+#define IORESOURCE_DMA_16BIT		(2<<0)
+
+#define IORESOURCE_DMA_MASTER		(1<<2)
+#define IORESOURCE_DMA_BYTE		(1<<3)
+#define IORESOURCE_DMA_WORD		(1<<4)
+
+#define IORESOURCE_DMA_SPEED_MASK	(3<<6)
+#define IORESOURCE_DMA_COMPATIBLE	(0<<6)
+#define IORESOURCE_DMA_TYPEA		(1<<6)
+#define IORESOURCE_DMA_TYPEB		(2<<6)
+#define IORESOURCE_DMA_TYPEF		(3<<6)
+
+/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_MEM_WRITEABLE	(1<<0)	/* dup: IORESOURCE_READONLY */
+#define IORESOURCE_MEM_CACHEABLE	(1<<1)	/* dup: IORESOURCE_CACHEABLE */
+#define IORESOURCE_MEM_RANGELENGTH	(1<<2)	/* dup: IORESOURCE_RANGELENGTH */
+#define IORESOURCE_MEM_TYPE_MASK	(3<<3)
+#define IORESOURCE_MEM_8BIT		(0<<3)
+#define IORESOURCE_MEM_16BIT		(1<<3)
+#define IORESOURCE_MEM_8AND16BIT	(2<<3)
+#define IORESOURCE_MEM_SHADOWABLE	(1<<5)	/* dup: IORESOURCE_SHADOWABLE */
+#define IORESOURCE_MEM_EXPANSIONROM	(1<<6)
+
+/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
+extern struct resource ioport_resource;
+extern struct resource iomem_resource;
+
+extern int get_resource_list(struct resource *, char *buf, int size);
+
+extern int check_resource(struct resource *root, unsigned long, unsigned long);
+extern int request_resource(struct resource *root, struct resource *new);
+extern int release_resource(struct resource *new);
+extern int allocate_resource(struct resource *root, struct resource *new,
+			     unsigned long size,
+			     unsigned long min, unsigned long max,
+			     unsigned long align,
+			     void (*alignf)(void *, struct resource *,
+					    unsigned long, unsigned long),
+			     void *alignf_data);
+
+/* Convenience shorthand with allocation */
+#define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
+#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
+
+extern struct resource * __request_region(struct resource *, unsigned long start, unsigned long n, const char *name);
+
+/* Compatibility cruft */
+#define check_region(start,n)	__check_region(&ioport_resource, (start), (n))
+#define release_region(start,n)	__release_region(&ioport_resource, (start), (n))
+#define check_mem_region(start,n)	__check_region(&iomem_resource, (start), (n))
+#define release_mem_region(start,n)	__release_region(&iomem_resource, (start), (n))
+
+extern int __check_region(struct resource *, unsigned long, unsigned long);
+extern void __release_region(struct resource *, unsigned long, unsigned long);
+
+#define get_ioport_list(buf)	get_resource_list(&ioport_resource, buf, PAGE_SIZE)
+#define get_mem_list(buf)	get_resource_list(&iomem_resource, buf, PAGE_SIZE)
+
+#define HAVE_AUTOIRQ
+extern void autoirq_setup(int waittime);
+extern int autoirq_report(int waittime);
+
+#endif	/* _LINUX_IOPORT_H */
diff --git a/xen/include/xeno/irq.h b/xen/include/xeno/irq.h
new file mode 100644
index 0000000000..7342491345
--- /dev/null
+++ b/xen/include/xeno/irq.h
@@ -0,0 +1,63 @@
+#ifndef __irq_h
+#define __irq_h
+
+#include <xeno/config.h>
+#include <xeno/spinlock.h>
+#include <asm/ptrace.h>
+
+/*
+ * IRQ line status.
+ */
+#define IRQ_INPROGRESS	1	/* IRQ handler active - do not enter! */
+#define IRQ_DISABLED	2	/* IRQ disabled - do not enter! */
+#define IRQ_PENDING	4	/* IRQ pending - replay on enable */
+#define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
+#define IRQ_AUTODETECT	16	/* IRQ is being autodetected */
+#define IRQ_WAITING	32	/* IRQ not yet seen - for autodetection */
+#define IRQ_LEVEL	64	/* IRQ level triggered */
+#define IRQ_MASKED	128	/* IRQ masked - shouldn't be seen again */
+#define IRQ_PER_CPU	256	/* IRQ is per CPU */
+
+/*
+ * Interrupt controller descriptor. This is all we need
+ * to describe about the low-level hardware. 
+ */
+struct hw_interrupt_type {
+	const char * typename;
+	unsigned int (*startup)(unsigned int irq);
+	void (*shutdown)(unsigned int irq);
+	void (*enable)(unsigned int irq);
+	void (*disable)(unsigned int irq);
+	void (*ack)(unsigned int irq);
+	void (*end)(unsigned int irq);
+	void (*set_affinity)(unsigned int irq, unsigned long mask);
+};
+
+typedef struct hw_interrupt_type  hw_irq_controller;
+
+#include <asm/irq.h>
+
+/*
+ * This is the "IRQ descriptor", which contains various information
+ * about the irq, including what kind of hardware handling it has,
+ * whether it is disabled etc etc.
+ *
+ * Pad this out to 32 bytes for cache and indexing reasons.
+ */
+typedef struct {
+	unsigned int status;		/* IRQ status */
+	hw_irq_controller *handler;
+	struct irqaction *action;	/* IRQ action list */
+	unsigned int depth;		/* nested irq disables */
+	spinlock_t lock;
+} ____cacheline_aligned irq_desc_t;
+
+extern irq_desc_t irq_desc [NR_IRQS];
+
+extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+extern int setup_irq(unsigned int , struct irqaction * );
+
+extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+
+#endif /* __asm_h */
diff --git a/xen/include/xeno/irq_cpustat.h b/xen/include/xeno/irq_cpustat.h
new file mode 100644
index 0000000000..646655403a
--- /dev/null
+++ b/xen/include/xeno/irq_cpustat.h
@@ -0,0 +1,34 @@
+#ifndef __irq_cpustat_h
+#define __irq_cpustat_h
+
+/*
+ * Contains default mappings for irq_cpustat_t, used by almost every
+ * architecture.  Some arch (like s390) have per cpu hardware pages and
+ * they define their own mappings for irq_stat.
+ *
+ * Keith Owens <kaos@ocs.com.au> July 2000.
+ */
+
+#include <xeno/config.h>
+
+/*
+ * Simple wrappers reducing source bloat.  Define all irq_stat fields
+ * here, even ones that are arch dependent.  That way we get common
+ * definitions instead of differing sets for each arch.
+ */
+
+extern irq_cpustat_t irq_stat[];			/* defined in asm/hardirq.h */
+
+#ifdef CONFIG_SMP
+#define __IRQ_STAT(cpu, member)	(irq_stat[cpu].member)
+#else
+#define __IRQ_STAT(cpu, member)	((void)(cpu), irq_stat[0].member)
+#endif	
+
+  /* arch independent irq_stat fields */
+#define softirq_pending(cpu)	__IRQ_STAT((cpu), __softirq_pending)
+#define local_irq_count(cpu)	__IRQ_STAT((cpu), __local_irq_count)
+#define local_bh_count(cpu)	__IRQ_STAT((cpu), __local_bh_count)
+#define syscall_count(cpu)	__IRQ_STAT((cpu), __syscall_count)
+
+#endif	/* __irq_cpustat_h */
diff --git a/xen/include/xeno/kdev_t.h b/xen/include/xeno/kdev_t.h
new file mode 100644
index 0000000000..9d85cba3e5
--- /dev/null
+++ b/xen/include/xeno/kdev_t.h
@@ -0,0 +1,123 @@
+#ifndef _LINUX_KDEV_T_H
+#define _LINUX_KDEV_T_H
+#if defined(__KERNEL__) || defined(_LVM_H_INCLUDE)
+/*
+As a preparation for the introduction of larger device numbers,
+we introduce a type kdev_t to hold them. No information about
+this type is known outside of this include file.
+
+Objects of type kdev_t designate a device. Outside of the kernel
+the corresponding things are objects of type dev_t - usually an
+integral type with the device major and minor in the high and low
+bits, respectively. Conversion is done by
+
+extern kdev_t to_kdev_t(int);
+
+It is up to the various file systems to decide how objects of type
+dev_t are stored on disk.
+The only other point of contact between kernel and outside world
+are the system calls stat and mknod, new versions of which will
+eventually have to be used in libc.
+
+[Unfortunately, the floppy control ioctls fail to hide the internal
+kernel structures, and the fd_device field of a struct floppy_drive_struct
+is user-visible. So, it remains a dev_t for the moment, with some ugly
+conversions in floppy.c.]
+
+Inside the kernel, we aim for a kdev_t type that is a pointer
+to a structure with information about the device (like major,
+minor, size, blocksize, sectorsize, name, read-only flag,
+struct file_operations etc.).
+
+However, for the time being we let kdev_t be almost the same as dev_t:
+
+typedef struct { unsigned short major, minor; } kdev_t;
+
+Admissible operations on an object of type kdev_t:
+- passing it along
+- comparing it for equality with another such object
+- storing it in ROOT_DEV, inode->i_dev, inode->i_rdev, sb->s_dev,
+  bh->b_dev, req->rq_dev, de->dc_dev, tty->device
+- using its bit pattern as argument in a hash function
+- finding its major and minor
+- complaining about it
+
+An object of type kdev_t is created only by the function MKDEV(),
+with the single exception of the constant 0 (no device).
+
+Right now the other information mentioned above is usually found
+in static arrays indexed by major or major,minor.
+
+An obstacle to immediately using
+    typedef struct { ... (* lots of information *) } *kdev_t
+is the case of mknod used to create a block device that the
+kernel doesn't know about at present (but first learns about
+when some module is inserted).
+
+aeb - 950811
+*/
+
+/* Since MINOR(dev) is used as index in static arrays,
+   the kernel is not quite ready yet for larger minors.
+   However, everything runs fine with an arbitrary kdev_t type. */
+
+#define MINORBITS	8
+#define MINORMASK	((1U << MINORBITS) - 1)
+
+typedef unsigned short kdev_t;
+
+#define MAJOR(dev)	((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)	((unsigned int) ((dev) & MINORMASK))
+#define HASHDEV(dev)	((unsigned int) (dev))
+#define NODEV		0
+#define MKDEV(ma,mi)	(((ma) << MINORBITS) | (mi))
+#define B_FREE		0xffff		/* yuk */
+
+extern const char * kdevname(kdev_t);	/* note: returns pointer to static data! */
+
+/* 2.5.x compatibility */
+#define mk_kdev(a,b)	MKDEV(a,b)
+#define major(d)	MAJOR(d)
+#define minor(d)	MINOR(d)
+#define kdev_same(a,b)	((a) == (b))
+#define kdev_none(d)	(!(d))
+#define kdev_val(d)	((unsigned int)(d))
+#define val_to_kdev(d)	((kdev_t)(d))
+
+/*
+As long as device numbers in the outside world have 16 bits only,
+we use these conversions.
+*/
+
+static inline unsigned int kdev_t_to_nr(kdev_t dev) {
+	return (MAJOR(dev)<<8) | MINOR(dev);
+}
+
+static inline kdev_t to_kdev_t(int dev)
+{
+	int major, minor;
+#if 0
+	major = (dev >> 16);
+	if (!major) {
+		major = (dev >> 8);
+		minor = (dev & 0xff);
+	} else
+		minor = (dev & 0xffff);
+#else
+	major = (dev >> 8);
+	minor = (dev & 0xff);
+#endif
+	return MKDEV(major, minor);
+}
+
+#else /* __KERNEL__ || _LVM_H_INCLUDE */
+
+/*
+Some programs want their definitions of MAJOR and MINOR and MKDEV
+from the kernel sources. These must be the externally visible ones.
+*/
+#define MAJOR(dev)	((dev)>>8)
+#define MINOR(dev)	((dev) & 0xff)
+#define MKDEV(ma,mi)	((ma)<<8 | (mi))
+#endif /* __KERNEL__ || _LVM_H_INCLUDE */
+#endif
diff --git a/xen/include/xeno/kernel.h b/xen/include/xeno/kernel.h
new file mode 100644
index 0000000000..993a6c19cf
--- /dev/null
+++ b/xen/include/xeno/kernel.h
@@ -0,0 +1,37 @@
+#ifndef _LINUX_KERNEL_H
+#define _LINUX_KERNEL_H
+
+/*
+ * 'kernel.h' contains some often-used function prototypes etc
+ */
+
+/*
+ * min()/max() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x,y) ({ \
+        const typeof(x) _x = (x);       \
+        const typeof(y) _y = (y);       \
+        (void) (&_x == &_y);            \
+        _x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+        const typeof(x) _x = (x);       \
+        const typeof(y) _y = (y);       \
+        (void) (&_x == &_y);            \
+        _x > _y ? _x : _y; })
+
+/*
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max at all, of course.
+ */
+#define min_t(type,x,y) \
+        ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+        ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+
+#endif /* _LINUX_KERNEL_H */
+
diff --git a/xen/include/xeno/keyhandler.h b/xen/include/xeno/keyhandler.h
new file mode 100644
index 0000000000..d03e09aa47
--- /dev/null
+++ b/xen/include/xeno/keyhandler.h
@@ -0,0 +1,16 @@
+/* 
+** We keep an array of 'handlers' for each key code between 0 and 255; 
+** this is intended to allow very simple debugging routines (toggle 
+** debug flag, dump registers, reboot, etc) to be hooked in in a slightly
+** nicer fashion than just editing the serial/keyboard drivers. 
+*/
+#include <xeno/sched.h>
+
+typedef void key_handler(unsigned char key, void *dev_id, 
+			 struct pt_regs *regs); 
+
+extern void add_key_handler(unsigned char key, 
+			    key_handler *handler, char *desc); 
+
+extern key_handler *get_key_handler(unsigned char key); 
+
diff --git a/xen/include/xeno/lib.h b/xen/include/xeno/lib.h
new file mode 100644
index 0000000000..cd40d119f0
--- /dev/null
+++ b/xen/include/xeno/lib.h
@@ -0,0 +1,51 @@
+#ifndef __LIB_H__
+#define __LIB_H__
+
+#include <stdarg.h>
+#include <xeno/types.h>
+
+#ifndef NDEBUG
+#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define reserve_bootmem(_p,_l) \
+printk("Memory Reservation 0x%lx, %lu bytes\n", (_p), (_l))
+
+/* lib.c */
+int memcmp(const void * cs,const void * ct,size_t count);
+void * memcpy(void * dest,const void *src,size_t count);
+int strncmp(const char * cs,const char * ct,size_t count);
+int strcmp(const char * cs,const char * ct);
+char * strcpy(char * dest,const char *src);
+char * strncpy(char * dest,const char *src,size_t count);
+void * memset(void * s,int c,size_t count);
+size_t strnlen(const char * s, size_t count);
+size_t strlen(const char * s);
+char * strchr(const char *,int);
+char * strstr(const char * s1,const char * s2);
+unsigned long str_to_quad(unsigned char *s);
+unsigned char *quad_to_str(unsigned long q, unsigned char *s);
+
+/* kernel.c */
+#define printk printf
+void printf (const char *format, ...);
+void cls(void);
+void panic(const char *format, ...);
+
+/* vsprintf.c */
+extern int sprintf(char * buf, const char * fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int vsprintf(char *buf, const char *, va_list);
+extern int snprintf(char * buf, size_t size, const char * fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+extern int sscanf(const char *, const char *, ...)
+	__attribute__ ((format (scanf,2,3)));
+extern int vsscanf(const char *, const char *, va_list);
+long simple_strtol(const char *cp,char **endp,unsigned int base);
+long long simple_strtoll(const char *cp,char **endp,unsigned int base);
+
+#endif /* __LIB_H__ */
diff --git a/xen/include/xeno/list.h b/xen/include/xeno/list.h
new file mode 100644
index 0000000000..4124a9a037
--- /dev/null
+++ b/xen/include/xeno/list.h
@@ -0,0 +1,160 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+	struct list_head * prev,
+	struct list_head * next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+				  struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry); 
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+	struct list_head *first = list->next;
+
+	if (first != list) {
+		struct list_head *last = list->prev;
+		struct list_head *at = head->next;
+
+		first->prev = head;
+		head->next = first;
+
+		last->next = at;
+		at->prev = last;
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+        	
+/**
+ * list_for_each_safe	-	iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+#endif
diff --git a/xen/include/xeno/major.h b/xen/include/xeno/major.h
new file mode 100644
index 0000000000..b30f88baf8
--- /dev/null
+++ b/xen/include/xeno/major.h
@@ -0,0 +1,199 @@
+#ifndef _LINUX_MAJOR_H
+#define _LINUX_MAJOR_H
+
+#include <xeno/kdev_t.h>
+
+/*
+ * This file has definitions for major device numbers.
+ * For the device number assignments, see Documentation/devices.txt.
+ */
+
+/* limits */
+
+/*
+ * Important: Don't change this to 256.  Major number 255 is and must be
+ * reserved for future expansion into a larger dev_t space.
+ */
+#define MAX_CHRDEV	255
+#define MAX_BLKDEV	255
+
+#define UNNAMED_MAJOR	0
+#define MEM_MAJOR	1
+#define RAMDISK_MAJOR	1
+#define FLOPPY_MAJOR	2
+#define PTY_MASTER_MAJOR 2
+#define IDE0_MAJOR	3
+#define PTY_SLAVE_MAJOR 3
+#define HD_MAJOR	IDE0_MAJOR
+#define TTY_MAJOR	4
+#define TTYAUX_MAJOR	5
+#define LP_MAJOR	6
+#define VCS_MAJOR	7
+#define LOOP_MAJOR	7
+#define SCSI_DISK0_MAJOR 8
+#define SCSI_TAPE_MAJOR	9
+#define MD_MAJOR        9
+#define MISC_MAJOR	10
+#define SCSI_CDROM_MAJOR 11
+#define QIC02_TAPE_MAJOR 12
+#define XT_DISK_MAJOR	13
+#define SOUND_MAJOR	14
+#define CDU31A_CDROM_MAJOR 15
+#define JOYSTICK_MAJOR	15
+#define GOLDSTAR_CDROM_MAJOR 16
+#define OPTICS_CDROM_MAJOR 17
+#define SANYO_CDROM_MAJOR 18
+#define CYCLADES_MAJOR  19
+#define CYCLADESAUX_MAJOR 20
+#define MITSUMI_X_CDROM_MAJOR 20
+#define MFM_ACORN_MAJOR 21	/* ARM Linux /dev/mfm */
+#define SCSI_GENERIC_MAJOR 21
+#define Z8530_MAJOR 34
+#define DIGI_MAJOR 23
+#define IDE1_MAJOR	22
+#define DIGICU_MAJOR 22
+#define MITSUMI_CDROM_MAJOR 23
+#define CDU535_CDROM_MAJOR 24
+#define STL_SERIALMAJOR 24
+#define MATSUSHITA_CDROM_MAJOR 25
+#define STL_CALLOUTMAJOR 25
+#define MATSUSHITA_CDROM2_MAJOR 26
+#define QIC117_TAPE_MAJOR 27
+#define MATSUSHITA_CDROM3_MAJOR 27
+#define MATSUSHITA_CDROM4_MAJOR 28
+#define STL_SIOMEMMAJOR 28
+#define ACSI_MAJOR	28
+#define AZTECH_CDROM_MAJOR 29
+#define GRAPHDEV_MAJOR	29	/* SparcLinux & Linux/68k /dev/fb */
+#define SHMIQ_MAJOR	85	/* Linux/mips, SGI /dev/shmiq */
+#define CM206_CDROM_MAJOR 32
+#define IDE2_MAJOR	33
+#define IDE3_MAJOR	34
+#define XPRAM_MAJOR     35      /* expanded storage on S/390 = "slow ram" */
+                                /* proposed by Peter                      */
+#define NETLINK_MAJOR	36
+#define PS2ESDI_MAJOR	36
+#define IDETAPE_MAJOR	37
+#define Z2RAM_MAJOR	37
+#define APBLOCK_MAJOR   38   /* AP1000 Block device */
+#define DDV_MAJOR       39   /* AP1000 DDV block device */
+#define NBD_MAJOR	43   /* Network block device	*/
+#define RISCOM8_NORMAL_MAJOR 48
+#define DAC960_MAJOR	48	/* 48..55 */
+#define RISCOM8_CALLOUT_MAJOR 49
+#define MKISS_MAJOR	55
+#define DSP56K_MAJOR    55   /* DSP56001 processor device */
+
+#define IDE4_MAJOR	56
+#define IDE5_MAJOR	57
+
+#define LVM_BLK_MAJOR	58	/* Logical Volume Manager */
+
+#define SCSI_DISK1_MAJOR	65
+#define SCSI_DISK2_MAJOR	66
+#define SCSI_DISK3_MAJOR	67
+#define SCSI_DISK4_MAJOR	68
+#define SCSI_DISK5_MAJOR	69
+#define SCSI_DISK6_MAJOR	70
+#define SCSI_DISK7_MAJOR	71
+
+
+#define COMPAQ_SMART2_MAJOR	72
+#define COMPAQ_SMART2_MAJOR1	73
+#define COMPAQ_SMART2_MAJOR2	74
+#define COMPAQ_SMART2_MAJOR3	75
+#define COMPAQ_SMART2_MAJOR4	76
+#define COMPAQ_SMART2_MAJOR5	77
+#define COMPAQ_SMART2_MAJOR6	78
+#define COMPAQ_SMART2_MAJOR7	79
+
+#define SPECIALIX_NORMAL_MAJOR 75
+#define SPECIALIX_CALLOUT_MAJOR 76
+
+#define COMPAQ_CISS_MAJOR 	104
+#define COMPAQ_CISS_MAJOR1	105
+#define COMPAQ_CISS_MAJOR2      106
+#define COMPAQ_CISS_MAJOR3      107
+#define COMPAQ_CISS_MAJOR4      108
+#define COMPAQ_CISS_MAJOR5      109
+#define COMPAQ_CISS_MAJOR6      110
+#define COMPAQ_CISS_MAJOR7      111
+
+#define ATARAID_MAJOR		114
+
+#define DASD_MAJOR      94	/* Official assignations from Peter */
+
+#define MDISK_MAJOR     95	/* Official assignations from Peter */
+
+#define I2O_MAJOR		80	/* 80->87 */
+
+#define IDE6_MAJOR	88
+#define IDE7_MAJOR	89
+#define IDE8_MAJOR	90
+#define IDE9_MAJOR	91
+
+#define UBD_MAJOR	98
+
+#define AURORA_MAJOR 79
+
+#define JSFD_MAJOR	99
+
+#define PHONE_MAJOR	100
+
+#define LVM_CHAR_MAJOR	109	/* Logical Volume Manager */
+
+#define	UMEM_MAJOR	116	/* http://www.umem.com/ Battery Backed RAM */
+
+#define RTF_MAJOR	150
+#define RAW_MAJOR	162
+
+#define USB_ACM_MAJOR		166
+#define USB_ACM_AUX_MAJOR	167
+#define USB_CHAR_MAJOR		180
+
+#define UNIX98_PTY_MASTER_MAJOR	128
+#define UNIX98_PTY_MAJOR_COUNT	8
+#define UNIX98_PTY_SLAVE_MAJOR	(UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
+
+#define VXVM_MAJOR		199	/* VERITAS volume i/o driver    */
+#define VXSPEC_MAJOR		200	/* VERITAS volume config driver */
+#define VXDMP_MAJOR		201	/* VERITAS volume multipath driver */
+
+#define MSR_MAJOR		202
+#define CPUID_MAJOR		203
+
+#define OSST_MAJOR	206	/* OnStream-SCx0 SCSI tape */
+
+#define IBM_TTY3270_MAJOR       227	/* Official allocations now */
+#define IBM_FS3270_MAJOR        228
+
+/*
+ * Tests for SCSI devices.
+ */
+
+#define SCSI_DISK_MAJOR(M) ((M) == SCSI_DISK0_MAJOR || \
+  ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR))
+  
+#define SCSI_BLK_MAJOR(M) \
+  (SCSI_DISK_MAJOR(M)	\
+   || (M) == SCSI_CDROM_MAJOR)
+
+static __inline__ int scsi_blk_major(int m) {
+	return SCSI_BLK_MAJOR(m);
+}
+
+/*
+ * Tests for IDE devices
+ */
+#define IDE_DISK_MAJOR(M)	((M) == IDE0_MAJOR || (M) == IDE1_MAJOR || \
+				(M) == IDE2_MAJOR || (M) == IDE3_MAJOR || \
+				(M) == IDE4_MAJOR || (M) == IDE5_MAJOR || \
+				(M) == IDE6_MAJOR || (M) == IDE7_MAJOR || \
+				(M) == IDE8_MAJOR || (M) == IDE9_MAJOR)
+
+static __inline__ int ide_blk_major(int m)
+{
+	return IDE_DISK_MAJOR(m);
+}
+
+#endif
diff --git a/xen/include/xeno/mii.h b/xen/include/xeno/mii.h
new file mode 100644
index 0000000000..943913583d
--- /dev/null
+++ b/xen/include/xeno/mii.h
@@ -0,0 +1,165 @@
+/*
+ * linux/mii.h: definitions for MII-compatible transceivers
+ * Originally drivers/net/sunhme.h.
+ *
+ * Copyright (C) 1996, 1999, 2001 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef __LINUX_MII_H__
+#define __LINUX_MII_H__
+
+#include <linux/types.h>
+
+/* Generic MII registers. */
+
+#define MII_BMCR            0x00        /* Basic mode control register */
+#define MII_BMSR            0x01        /* Basic mode status register  */
+#define MII_PHYSID1         0x02        /* PHYS ID 1                   */
+#define MII_PHYSID2         0x03        /* PHYS ID 2                   */
+#define MII_ADVERTISE       0x04        /* Advertisement control reg   */
+#define MII_LPA             0x05        /* Link partner ability reg    */
+#define MII_EXPANSION       0x06        /* Expansion register          */
+#define MII_DCOUNTER        0x12        /* Disconnect counter          */
+#define MII_FCSCOUNTER      0x13        /* False carrier counter       */
+#define MII_NWAYTEST        0x14        /* N-way auto-neg test reg     */
+#define MII_RERRCOUNTER     0x15        /* Receive error counter       */
+#define MII_SREVISION       0x16        /* Silicon revision            */
+#define MII_RESV1           0x17        /* Reserved...                 */
+#define MII_LBRERROR        0x18        /* Lpback, rx, bypass error    */
+#define MII_PHYADDR         0x19        /* PHY address                 */
+#define MII_RESV2           0x1a        /* Reserved...                 */
+#define MII_TPISTATUS       0x1b        /* TPI status for 10mbps       */
+#define MII_NCONFIG         0x1c        /* Network interface config    */
+
+/* Basic mode control register. */
+#define BMCR_RESV               0x007f  /* Unused...                   */
+#define BMCR_CTST               0x0080  /* Collision test              */
+#define BMCR_FULLDPLX           0x0100  /* Full duplex                 */
+#define BMCR_ANRESTART          0x0200  /* Auto negotiation restart    */
+#define BMCR_ISOLATE            0x0400  /* Disconnect DP83840 from MII */
+#define BMCR_PDOWN              0x0800  /* Powerdown the DP83840       */
+#define BMCR_ANENABLE           0x1000  /* Enable auto negotiation     */
+#define BMCR_SPEED100           0x2000  /* Select 100Mbps              */
+#define BMCR_LOOPBACK           0x4000  /* TXD loopback bits           */
+#define BMCR_RESET              0x8000  /* Reset the DP83840           */
+
+/* Basic mode status register. */
+#define BMSR_ERCAP              0x0001  /* Ext-reg capability          */
+#define BMSR_JCD                0x0002  /* Jabber detected             */
+#define BMSR_LSTATUS            0x0004  /* Link status                 */
+#define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
+#define BMSR_RFAULT             0x0010  /* Remote fault detected       */
+#define BMSR_ANEGCOMPLETE       0x0020  /* Auto-negotiation complete   */
+#define BMSR_RESV               0x07c0  /* Unused...                   */
+#define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
+#define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
+#define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL            0x4000  /* Can do 100mbps, full-duplex */
+#define BMSR_100BASE4           0x8000  /* Can do 100mbps, 4k packets  */
+
+/* Advertisement control register. */
+#define ADVERTISE_SLCT          0x001f  /* Selector bits               */
+#define ADVERTISE_CSMA          0x0001  /* Only selector supported     */
+#define ADVERTISE_10HALF        0x0020  /* Try for 10mbps half-duplex  */
+#define ADVERTISE_10FULL        0x0040  /* Try for 10mbps full-duplex  */
+#define ADVERTISE_100HALF       0x0080  /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL       0x0100  /* Try for 100mbps full-duplex */
+#define ADVERTISE_100BASE4      0x0200  /* Try for 100mbps 4k packets  */
+#define ADVERTISE_RESV          0x1c00  /* Unused...                   */
+#define ADVERTISE_RFAULT        0x2000  /* Say we can detect faults    */
+#define ADVERTISE_LPACK         0x4000  /* Ack link partners response  */
+#define ADVERTISE_NPAGE         0x8000  /* Next page bit               */
+
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+                       ADVERTISE_100HALF | ADVERTISE_100FULL)
+
+/* Link partner ability register. */
+#define LPA_SLCT                0x001f  /* Same as advertise selector  */
+#define LPA_10HALF              0x0020  /* Can do 10mbps half-duplex   */
+#define LPA_10FULL              0x0040  /* Can do 10mbps full-duplex   */
+#define LPA_100HALF             0x0080  /* Can do 100mbps half-duplex  */
+#define LPA_100FULL             0x0100  /* Can do 100mbps full-duplex  */
+#define LPA_100BASE4            0x0200  /* Can do 100mbps 4k packets   */
+#define LPA_RESV                0x1c00  /* Unused...                   */
+#define LPA_RFAULT              0x2000  /* Link partner faulted        */
+#define LPA_LPACK               0x4000  /* Link partner acked us       */
+#define LPA_NPAGE               0x8000  /* Next page bit               */
+
+#define LPA_DUPLEX		(LPA_10FULL | LPA_100FULL)
+#define LPA_100			(LPA_100FULL | LPA_100HALF | LPA_100BASE4)
+
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_NWAY          0x0001  /* Can do N-way auto-nego      */
+#define EXPANSION_LCWP          0x0002  /* Got new RX page code word   */
+#define EXPANSION_ENABLENPAGE   0x0004  /* This enables npage words    */
+#define EXPANSION_NPCAPABLE     0x0008  /* Link partner supports npage */
+#define EXPANSION_MFAULTS       0x0010  /* Multiple faults detected    */
+#define EXPANSION_RESV          0xffe0  /* Unused...                   */
+
+/* N-way test register. */
+#define NWAYTEST_RESV1          0x00ff  /* Unused...                   */
+#define NWAYTEST_LOOPBACK       0x0100  /* Enable loopback for N-way   */
+#define NWAYTEST_RESV2          0xfe00  /* Unused...                   */
+
+/* This structure is used in all SIOCxMIIxxx ioctl calls */
+struct mii_ioctl_data {
+	u16		phy_id;
+	u16		reg_num;
+	u16		val_in;
+	u16		val_out;
+};
+
+
+/**
+ * mii_nway_result
+ * @negotiated: value of MII ANAR and'd with ANLPAR
+ *
+ * Given a set of MII abilities, check each bit and returns the
+ * currently supported media, in the priority order defined by
+ * IEEE 802.3u.  We use LPA_xxx constants but note this is not the
+ * value of LPA solely, as described above.
+ *
+ * The one exception to IEEE 802.3u is that 100baseT4 is placed
+ * between 100T-full and 100T-half.  If your phy does not support
+ * 100T4 this is fine.  If your phy places 100T4 elsewhere in the
+ * priority order, you will need to roll your own function.
+ */
+static inline unsigned int mii_nway_result (unsigned int negotiated)
+{
+	unsigned int ret;
+
+	if (negotiated & LPA_100FULL)
+		ret = LPA_100FULL;
+	else if (negotiated & LPA_100BASE4)
+		ret = LPA_100BASE4;
+	else if (negotiated & LPA_100HALF)
+		ret = LPA_100HALF;
+	else if (negotiated & LPA_10FULL)
+		ret = LPA_10FULL;
+	else
+		ret = LPA_10HALF;
+
+	return ret;
+}
+
+/**
+ * mii_duplex
+ * @duplex_lock: Non-zero if duplex is locked at full
+ * @negotiated: value of MII ANAR and'd with ANLPAR
+ *
+ * A small helper function for a common case.  Returns one
+ * if the media is operating or locked at full duplex, and
+ * returns zero otherwise.
+ */
+static inline unsigned int mii_duplex (unsigned int duplex_lock,
+				       unsigned int negotiated)
+{
+	if (duplex_lock)
+		return 1;
+	if (mii_nway_result(negotiated) & LPA_DUPLEX)
+		return 1;
+	return 0;
+}
+
+
+#endif /* __LINUX_MII_H__ */
diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h
new file mode 100644
index 0000000000..6605f1ed58
--- /dev/null
+++ b/xen/include/xeno/mm.h
@@ -0,0 +1,142 @@
+
+#ifndef __XENO_MM_H__
+#define __XENO_MM_H__
+
+#include <xeno/config.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <xeno/list.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <xeno/spinlock.h>
+
+/* XXX KAF: These may die eventually, but so many refs in slab.c :((( */
+
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
+#define __GFP_DMA       0x01
+
+/* Action modifiers - doesn't change the zoning */
+#define __GFP_WAIT      0x10    /* Can wait and reschedule? */
+#define __GFP_HIGH      0x20    /* Should access emergency pools? */
+#define __GFP_IO        0x40    /* Can start low memory physical IO? */
+#define __GFP_HIGHIO    0x80    /* Can start high mem physical IO? */
+#define __GFP_FS        0x100   /* Can call down to low-level FS? */
+
+#define GFP_ATOMIC      (__GFP_HIGH)
+#define GFP_KERNEL      (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+
+/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
+   platforms, used as appropriate on others */
+
+#define GFP_DMA         __GFP_DMA
+
+
+/******************************************************************************
+ * The following is for page_alloc.c.
+ */
+
+void init_page_allocator(unsigned long min, unsigned long max);
+unsigned long __get_free_pages(int mask, int order);
+void __free_pages(unsigned long p, int order);
+#define get_free_page(_m) (__get_free_pages((_m),0))
+#define __get_free_page(_m) (__get_free_pages((_m),0))
+#define free_pages(_p,_o) (__free_pages(_p,_o))
+#define free_page(_p) (__free_pages(_p,0))
+
+
+/******************************************************************************
+ * The following is the array of page info. One entry per page owned
+ * by the hypervisor, indexed from `mem_map', just like Linux.
+ *
+ * 12.11.02. We no longer use struct page or mem_map, these are replaced
+ * with struct pfn_info and frame_table respectively. Boris Dragovic
+ */
+
+/*
+ * This is still fatter than I'd like. Do we need the count?
+ * Do we need the flags? The list at least seems req'd by slab.c.
+ */
+typedef struct pfn_info {
+    struct list_head list;      /* ->mapping has some page lists. */
+    unsigned long flags;        /* atomic flags. */
+    unsigned long tot_count;    /* Total domain usage count. */
+    unsigned long type_count;   /* pagetable/dir, or domain-writeable refs. */
+} frame_table_t;
+
+/*
+ * We use a high bit to indicate that a page is pinned.
+ * We do not use the top bit as that would mean that we'd get confused with
+ * -ve error numbers in some places in common/memory.c.
+ */
+#define REFCNT_PIN_BIT 0x40000000UL
+
+#define get_page_tot(p)		 ((p)->tot_count++)
+#define put_page_tot(p)		 (--(p)->tot_count)
+#define page_tot_count(p)	 ((p)->tot_count)
+#define set_page_tot_count(p,v)  ((p)->tot_count = v)
+
+#define get_page_type(p)	 ((p)->type_count++)
+#define put_page_type(p)	 (--(p)->type_count)
+#define page_type_count(p)	 ((p)->type_count)
+#define set_page_type_count(p,v) ((p)->type_count = v)
+
+#define PG_domain_mask 0x00ffffff /* owning domain (24 bits) */
+/* hypervisor flags (domain == 0) */
+#define PG_slab	       24
+/* domain flags (domain != 0) */
+/*
+ * NB. The following three flags are MUTUALLY EXCLUSIVE!
+ * At most one can be true at any point, and 'type_count' counts how many
+ * references exist of teh current type. A change in type can only occur
+ * when type_count == 0.
+ */
+#define PG_type_mask        (15<<24) /* bits 24-27 */
+#define PGT_none            (0<<24) /* no special uses of this page */
+#define PGT_l1_page_table   (1<<24) /* using this page as an L1 page table? */
+#define PGT_l2_page_table   (2<<24) /* using this page as an L2 page table? */
+#define PGT_l3_page_table   (3<<24) /* using this page as an L3 page table? */
+#define PGT_l4_page_table   (4<<24) /* using this page as an L4 page table? */
+#define PGT_gdt_page        (5<<24) /* using this page in a GDT? */
+#define PGT_ldt_page        (6<<24) /* using this page in an LDT? */
+#define PGT_writeable_page  (7<<24) /* has writable mappings of this page? */
+#define PGT_net_rx_buf      (8<<24) /* this page has been pirated by the net code. */
+
+#define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
+#define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
+#define PageClearSlab(page)	clear_bit(PG_slab, &(page)->flags)
+
+#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom)            \
+    do {                                             \
+        (_pfn)->flags = (_dom) | PGT_writeable_page; \
+        (_pfn)->tot_count = (_pfn)->type_count = 1;  \
+    } while ( 0 )
+
+#define UNSHARE_PFN(_pfn) \
+    (_pfn)->flags = (_pfn)->type_count = (_pfn)->tot_count = 0
+
+/* The array of struct pfn_info,  
+ * free pfn list and number of free pfns in the free list
+ */
+extern frame_table_t * frame_table;
+extern unsigned long frame_table_size;
+extern struct list_head free_list;
+extern spinlock_t free_list_lock;
+extern unsigned int free_pfns;
+extern unsigned long max_page;
+void init_frametable(unsigned long nr_pages);
+
+/*
+ * The MPT (machine->physical mapping table) is an array of word-sized
+ * values, indexed on machine frame number. It is expected that guest OSes
+ * will use it to store a "physical" frame number to give the appearance of
+ * contiguous (or near contiguous) physical memory.
+ */
+#undef  machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
+
+/* Part of the domain API. */
+int do_process_page_updates(page_update_request_t *updates, int count);
+
+#define DEFAULT_GDT_ENTRIES     ((FIRST_DOMAIN_GDT_ENTRY*8)-1)
+#define DEFAULT_GDT_ADDRESS     ((unsigned long)gdt_table)
+
+#endif /* __XENO_MM_H__ */
diff --git a/xen/include/xeno/module.h b/xen/include/xeno/module.h
new file mode 100644
index 0000000000..5e8ce698d6
--- /dev/null
+++ b/xen/include/xeno/module.h
@@ -0,0 +1,417 @@
+/*
+ * Dynamic loading of modules into the kernel.
+ *
+ * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
+ */
+
+#ifndef _LINUX_MODULE_H
+#define _LINUX_MODULE_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#ifdef __GENKSYMS__
+#  define _set_ver(sym) sym
+#  undef  MODVERSIONS
+#  define MODVERSIONS
+#else /* ! __GENKSYMS__ */
+# if !defined(MODVERSIONS) && defined(EXPORT_SYMTAB)
+#   define _set_ver(sym) sym
+#   include <linux/modversions.h>
+# endif
+#endif /* __GENKSYMS__ */
+
+#include <asm/atomic.h>
+
+/* Don't need to bring in all of uaccess.h just for this decl.  */
+struct exception_table_entry;
+
+/* Used by get_kernel_syms, which is obsolete.  */
+struct kernel_sym
+{
+	unsigned long value;
+	char name[60];		/* should have been 64-sizeof(long); oh well */
+};
+
+struct module_symbol
+{
+	unsigned long value;
+	const char *name;
+};
+
+struct module_ref
+{
+	struct module *dep;	/* "parent" pointer */
+	struct module *ref;	/* "child" pointer */
+	struct module_ref *next_ref;
+};
+
+/* TBD */
+struct module_persist;
+
+struct module
+{
+	unsigned long size_of_struct;	/* == sizeof(module) */
+	struct module *next;
+	const char *name;
+	unsigned long size;
+
+	union
+	{
+		atomic_t usecount;
+		long pad;
+	} uc;				/* Needs to keep its size - so says rth */
+
+	unsigned long flags;		/* AUTOCLEAN et al */
+
+	unsigned nsyms;
+	unsigned ndeps;
+
+	struct module_symbol *syms;
+	struct module_ref *deps;
+	struct module_ref *refs;
+	int (*init)(void);
+	void (*cleanup)(void);
+	const struct exception_table_entry *ex_table_start;
+	const struct exception_table_entry *ex_table_end;
+#ifdef __alpha__
+	unsigned long gp;
+#endif
+	/* Members past this point are extensions to the basic
+	   module support and are optional.  Use mod_member_present()
+	   to examine them.  */
+	const struct module_persist *persist_start;
+	const struct module_persist *persist_end;
+	int (*can_unload)(void);
+	int runsize;			/* In modutils, not currently used */
+	const char *kallsyms_start;	/* All symbols for kernel debugging */
+	const char *kallsyms_end;
+	const char *archdata_start;	/* arch specific data for module */
+	const char *archdata_end;
+	const char *kernel_data;	/* Reserved for kernel internal use */
+};
+
+struct module_info
+{
+	unsigned long addr;
+	unsigned long size;
+	unsigned long flags;
+	long usecount;
+};
+
+/* Bits of module.flags.  */
+
+#define MOD_UNINITIALIZED	0
+#define MOD_RUNNING		1
+#define MOD_DELETED		2
+#define MOD_AUTOCLEAN		4
+#define MOD_VISITED  		8
+#define MOD_USED_ONCE		16
+#define MOD_JUST_FREED		32
+#define MOD_INITIALIZING	64
+
+/* Values for query_module's which.  */
+
+#define QM_MODULES	1
+#define QM_DEPS		2
+#define QM_REFS		3
+#define QM_SYMBOLS	4
+#define QM_INFO		5
+
+/* Can the module be queried? */
+#define MOD_CAN_QUERY(mod) (((mod)->flags & (MOD_RUNNING | MOD_INITIALIZING)) && !((mod)->flags & MOD_DELETED))
+
+/* When struct module is extended, we must test whether the new member
+   is present in the header received from insmod before we can use it.  
+   This function returns true if the member is present.  */
+
+#define mod_member_present(mod,member) 					\
+	((unsigned long)(&((struct module *)0L)->member + 1)		\
+	 <= (mod)->size_of_struct)
+
+/*
+ * Ditto for archdata.  Assumes mod->archdata_start and mod->archdata_end
+ * are validated elsewhere.
+ */
+#define mod_archdata_member_present(mod, type, member)			\
+	(((unsigned long)(&((type *)0L)->member) +			\
+	  sizeof(((type *)0L)->member)) <=				\
+	 ((mod)->archdata_end - (mod)->archdata_start))
+	 
+
+/* Check if an address p with number of entries n is within the body of module m */
+#define mod_bound(p, n, m) ((unsigned long)(p) >= ((unsigned long)(m) + ((m)->size_of_struct)) && \
+	         (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size)
+
+/* Backwards compatibility definition.  */
+
+#define GET_USE_COUNT(module)	(atomic_read(&(module)->uc.usecount))
+
+/* Poke the use count of a module.  */
+
+#define __MOD_INC_USE_COUNT(mod)					\
+	(atomic_inc(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED|MOD_USED_ONCE)
+#define __MOD_DEC_USE_COUNT(mod)					\
+	(atomic_dec(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED)
+#define __MOD_IN_USE(mod)						\
+	(mod_member_present((mod), can_unload) && (mod)->can_unload	\
+	 ? (mod)->can_unload() : atomic_read(&(mod)->uc.usecount))
+
+/* Indirect stringification.  */
+
+#define __MODULE_STRING_1(x)	#x
+#define __MODULE_STRING(x)	__MODULE_STRING_1(x)
+
+/* Generic inter module communication.
+ *
+ * NOTE: This interface is intended for small amounts of data that are
+ *       passed between two objects and either or both of the objects
+ *       might be compiled as modules.  Do not over use this interface.
+ *
+ *       If more than two objects need to communicate then you probably
+ *       need a specific interface instead of abusing this generic
+ *       interface.  If both objects are *always* built into the kernel
+ *       then a global extern variable is good enough, you do not need
+ *       this interface.
+ *
+ * Keith Owens <kaos@ocs.com.au> 28 Oct 2000.
+ */
+
+#ifdef __KERNEL__
+#define HAVE_INTER_MODULE
+extern void inter_module_register(const char *, struct module *, const void *);
+extern void inter_module_unregister(const char *);
+extern const void *inter_module_get(const char *);
+extern const void *inter_module_get_request(const char *, const char *);
+extern void inter_module_put(const char *);
+
+struct inter_module_entry {
+	struct list_head list;
+	const char *im_name;
+	struct module *owner;
+	const void *userdata;
+};
+
+#if 0
+extern int try_inc_mod_count(struct module *mod);
+#else
+static inline int try_inc_mod_count(struct module * mod)
+{
+    if ( mod ) __MOD_INC_USE_COUNT(mod);
+    return 1;
+}
+#endif
+#endif /* __KERNEL__ */
+
+#if defined(MODULE) && !defined(__GENKSYMS__)
+
+/* Embedded module documentation macros.  */
+
+/* For documentation purposes only.  */
+
+#define MODULE_AUTHOR(name)						   \
+const char __module_author[] __attribute__((section(".modinfo"))) = 	   \
+"author=" name
+
+#define MODULE_DESCRIPTION(desc)					   \
+const char __module_description[] __attribute__((section(".modinfo"))) =   \
+"description=" desc
+
+/* Could potentially be used by kmod...  */
+
+#define MODULE_SUPPORTED_DEVICE(dev)					   \
+const char __module_device[] __attribute__((section(".modinfo"))) = 	   \
+"device=" dev
+
+/* Used to verify parameters given to the module.  The TYPE arg should
+   be a string in the following format:
+   	[min[-max]]{b,h,i,l,s}
+   The MIN and MAX specifiers delimit the length of the array.  If MAX
+   is omitted, it defaults to MIN; if both are omitted, the default is 1.
+   The final character is a type specifier:
+	b	byte
+	h	short
+	i	int
+	l	long
+	s	string
+*/
+
+#define MODULE_PARM(var,type)			\
+const char __module_parm_##var[]		\
+__attribute__((section(".modinfo"))) =		\
+"parm_" __MODULE_STRING(var) "=" type
+
+#define MODULE_PARM_DESC(var,desc)		\
+const char __module_parm_desc_##var[]		\
+__attribute__((section(".modinfo"))) =		\
+"parm_desc_" __MODULE_STRING(var) "=" desc
+
+/*
+ * MODULE_DEVICE_TABLE exports information about devices
+ * currently supported by this module.  A device type, such as PCI,
+ * is a C-like identifier passed as the first arg to this macro.
+ * The second macro arg is the variable containing the device
+ * information being made public.
+ *
+ * The following is a list of known device types (arg 1),
+ * and the C types which are to be passed as arg 2.
+ * pci - struct pci_device_id - List of PCI ids supported by this module
+ * isapnp - struct isapnp_device_id - List of ISA PnP ids supported by this module
+ * usb - struct usb_device_id - List of USB ids supported by this module
+ */
+#define MODULE_GENERIC_TABLE(gtype,name)	\
+static const unsigned long __module_##gtype##_size \
+  __attribute__ ((unused)) = sizeof(struct gtype##_id); \
+static const struct gtype##_id * __module_##gtype##_table \
+  __attribute__ ((unused)) = name
+
+/*
+ * The following license idents are currently accepted as indicating free
+ * software modules
+ *
+ *	"GPL"				[GNU Public License v2 or later]
+ *	"GPL and additional rights"	[GNU Public License v2 rights and more]
+ *	"Dual BSD/GPL"			[GNU Public License v2 or BSD license choice]
+ *	"Dual MPL/GPL"			[GNU Public License v2 or Mozilla license choice]
+ *
+ * The following other idents are available
+ *
+ *	"Proprietary"			[Non free products]
+ *
+ * There are dual licensed components, but when running with Linux it is the
+ * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
+ * is a GPL combined work.
+ *
+ * This exists for several reasons
+ * 1.	So modinfo can show license info for users wanting to vet their setup 
+ *	is free
+ * 2.	So the community can ignore bug reports including proprietary modules
+ * 3.	So vendors can do likewise based on their own policies
+ */
+ 
+#define MODULE_LICENSE(license) 	\
+static const char __module_license[] __attribute__((section(".modinfo"))) =   \
+"license=" license
+
+/* Define the module variable, and usage macros.  */
+extern struct module __this_module;
+
+#define THIS_MODULE		(&__this_module)
+#define MOD_INC_USE_COUNT	__MOD_INC_USE_COUNT(THIS_MODULE)
+#define MOD_DEC_USE_COUNT	__MOD_DEC_USE_COUNT(THIS_MODULE)
+#define MOD_IN_USE		__MOD_IN_USE(THIS_MODULE)
+
+#if 0
+#include <linux/version.h>
+static const char __module_kernel_version[] __attribute__((section(".modinfo"))) =
+"kernel_version=" UTS_RELEASE;
+#ifdef MODVERSIONS
+static const char __module_using_checksums[] __attribute__((section(".modinfo"))) =
+"using_checksums=1";
+#endif
+#endif
+
+#else /* MODULE */
+
+#define MODULE_AUTHOR(name)
+#define MODULE_LICENSE(license)
+#define MODULE_DESCRIPTION(desc)
+#define MODULE_SUPPORTED_DEVICE(name)
+#define MODULE_PARM(var,type)
+#define MODULE_PARM_DESC(var,desc)
+
+/* Create a dummy reference to the table to suppress gcc unused warnings.  Put
+ * the reference in the .data.exit section which is discarded when code is built
+ * in, so the reference does not bloat the running kernel.  Note: cannot be
+ * const, other exit data may be writable.
+ */
+#define MODULE_GENERIC_TABLE(gtype,name) \
+static const struct gtype##_id * __module_##gtype##_table \
+  __attribute__ ((unused, __section__(".data.exit"))) = name
+
+#ifndef __GENKSYMS__
+
+#define THIS_MODULE		NULL
+#define MOD_INC_USE_COUNT	do { } while (0)
+#define MOD_DEC_USE_COUNT	do { } while (0)
+#define MOD_IN_USE		1
+
+extern struct module *module_list;
+
+#endif /* !__GENKSYMS__ */
+
+#endif /* MODULE */
+
+#define MODULE_DEVICE_TABLE(type,name)		\
+  MODULE_GENERIC_TABLE(type##_device,name)
+
+/* Export a symbol either from the kernel or a module.
+
+   In the kernel, the symbol is added to the kernel's global symbol table.
+
+   In a module, it controls which variables are exported.  If no
+   variables are explicitly exported, the action is controled by the
+   insmod -[xX] flags.  Otherwise, only the variables listed are exported.
+   This obviates the need for the old register_symtab() function.  */
+
+#if defined(__GENKSYMS__)
+
+/* We want the EXPORT_SYMBOL tag left intact for recognition.  */
+
+#elif !defined(CONFIG_MODULES)
+
+#define __EXPORT_SYMBOL(sym,str)
+#define EXPORT_SYMBOL(var)
+#define EXPORT_SYMBOL_NOVERS(var)
+#define EXPORT_SYMBOL_GPL(var)
+
+#elif !defined(EXPORT_SYMTAB)
+
+#define __EXPORT_SYMBOL(sym,str)   error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL(var)	   error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL_NOVERS(var)  error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL_GPL(var)  error this_object_must_be_defined_as_export_objs_in_the_Makefile
+
+#else
+
+#define __EXPORT_SYMBOL(sym, str)			\
+const char __kstrtab_##sym[]				\
+__attribute__((section(".kstrtab"))) = str;		\
+const struct module_symbol __ksymtab_##sym 		\
+__attribute__((section("__ksymtab"))) =			\
+{ (unsigned long)&sym, __kstrtab_##sym }
+
+#define __EXPORT_SYMBOL_GPL(sym, str)			\
+const char __kstrtab_##sym[]				\
+__attribute__((section(".kstrtab"))) = "GPLONLY_" str;	\
+const struct module_symbol __ksymtab_##sym		\
+__attribute__((section("__ksymtab"))) =			\
+{ (unsigned long)&sym, __kstrtab_##sym }
+
+#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)
+#define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+#define EXPORT_SYMBOL_GPL(var)  __EXPORT_SYMBOL_GPL(var, __MODULE_STRING(var))
+#else
+#define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+#define EXPORT_SYMBOL_GPL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+#endif
+
+#define EXPORT_SYMBOL_NOVERS(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+
+#endif /* __GENKSYMS__ */
+
+#ifdef MODULE
+/* Force a module to export no symbols.  */
+#define EXPORT_NO_SYMBOLS  __asm__(".section __ksymtab\n.previous")
+#else
+#define EXPORT_NO_SYMBOLS
+#endif /* MODULE */
+
+#ifdef CONFIG_MODULES
+#define SET_MODULE_OWNER(some_struct) do { (some_struct)->owner = THIS_MODULE; } while (0)
+#else
+#define SET_MODULE_OWNER(some_struct) do { } while (0)
+#endif
+
+#endif /* _LINUX_MODULE_H */
diff --git a/xen/include/xeno/multiboot.h b/xen/include/xeno/multiboot.h
new file mode 100644
index 0000000000..a61117bbbd
--- /dev/null
+++ b/xen/include/xeno/multiboot.h
@@ -0,0 +1,81 @@
+/* multiboot.h - the header for Multiboot */
+/* Copyright (C) 1999, 2001  Free Software Foundation, Inc.
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifndef __ELF__
+#error "Build on a 32-bit ELF system"
+#endif
+
+/* The magic number passed by a Multiboot-compliant boot loader. */
+#define MULTIBOOT_BOOTLOADER_MAGIC	0x2BADB002
+
+/* The symbol table for a.out.  */
+typedef struct aout_symbol_table
+{
+  unsigned long tabsize;
+  unsigned long strsize;
+  unsigned long addr;
+  unsigned long reserved;
+} aout_symbol_table_t;
+
+/* The section header table for ELF.  */
+typedef struct elf_section_header_table
+{
+  unsigned long num;
+  unsigned long size;
+  unsigned long addr;
+  unsigned long shndx;
+} elf_section_header_table_t;
+
+/* The Multiboot information.  */
+typedef struct multiboot_info
+{
+  unsigned long flags;
+  unsigned long mem_lower;
+  unsigned long mem_upper;
+  unsigned long boot_device;
+  unsigned long cmdline;
+  unsigned long mods_count;
+  unsigned long mods_addr;
+  union
+  {
+    aout_symbol_table_t aout_sym;
+    elf_section_header_table_t elf_sec;
+  } u;
+  unsigned long mmap_length;
+  unsigned long mmap_addr;
+} multiboot_info_t;
+
+/* The module structure.  */
+typedef struct module
+{
+  unsigned long mod_start;
+  unsigned long mod_end;
+  unsigned long string;
+  unsigned long reserved;
+} module_t;
+
+/* The memory map. Be careful that the offset 0 is base_addr_low
+   but no size.  */
+typedef struct memory_map
+{
+  unsigned long size;
+  unsigned long base_addr_low;
+  unsigned long base_addr_high;
+  unsigned long length_low;
+  unsigned long length_high;
+  unsigned long type;
+} memory_map_t;
diff --git a/xen/include/xeno/netdevice.h b/xen/include/xeno/netdevice.h
new file mode 100644
index 0000000000..0d7c4c5606
--- /dev/null
+++ b/xen/include/xeno/netdevice.h
@@ -0,0 +1,604 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions for the Interfaces handler.
+ *
+ * Version:	@(#)dev.h	1.0.10	08/12/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *		Bjorn Ekwall. <bj0rn@blox.se>
+ *              Pekka Riikonen <priikone@poseidon.pspt.fi>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *		Moved to /usr/include/linux for NET3
+ */
+#ifndef _LINUX_NETDEVICE_H
+#define _LINUX_NETDEVICE_H
+
+#include <xeno/if.h>
+#include <xeno/if_ether.h>
+#include <xeno/if_packet.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+
+#include <asm/atomic.h>
+#include <asm/cache.h>
+#include <asm/byteorder.h>
+
+#ifdef __KERNEL__
+#include <xeno/config.h>
+
+struct vlan_group;
+
+/* Backlog congestion levels */
+#define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
+#define NET_RX_DROP		1  /* packet dropped */
+
+#endif
+
+#define MAX_ADDR_LEN	8		/* Largest hardware address length */
+
+/*
+ *	Network device statistics. Akin to the 2.0 ether stats but
+ *	with byte counters.
+ */
+ 
+struct net_device_stats
+{
+	unsigned long	rx_packets;		/* total packets received	*/
+	unsigned long	tx_packets;		/* total packets transmitted	*/
+	unsigned long	rx_bytes;		/* total bytes received 	*/
+	unsigned long	tx_bytes;		/* total bytes transmitted	*/
+	unsigned long	rx_errors;		/* bad packets received		*/
+	unsigned long	tx_errors;		/* packet transmit problems	*/
+	unsigned long	rx_dropped;		/* no space in linux buffers	*/
+	unsigned long	tx_dropped;		/* no space available in linux	*/
+	unsigned long	multicast;		/* multicast packets received	*/
+	unsigned long	collisions;
+
+	/* detailed rx_errors: */
+	unsigned long	rx_length_errors;
+	unsigned long	rx_over_errors;		/* receiver ring buff overflow	*/
+	unsigned long	rx_crc_errors;		/* recved pkt with crc error	*/
+	unsigned long	rx_frame_errors;	/* recv'd frame alignment error */
+	unsigned long	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	unsigned long	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	unsigned long	tx_aborted_errors;
+	unsigned long	tx_carrier_errors;
+	unsigned long	tx_fifo_errors;
+	unsigned long	tx_heartbeat_errors;
+	unsigned long	tx_window_errors;
+	
+	/* for cslip etc */
+	unsigned long	rx_compressed;
+	unsigned long	tx_compressed;
+};
+
+
+/* Media selection options. */
+enum {
+        IF_PORT_UNKNOWN = 0,
+        IF_PORT_10BASE2,
+        IF_PORT_10BASET,
+        IF_PORT_AUI,
+        IF_PORT_100BASET,
+        IF_PORT_100BASETX,
+        IF_PORT_100BASEFX
+};
+
+#ifdef __KERNEL__
+
+extern const char *if_port_text[];
+
+#include <xeno/cache.h>
+#include <xeno/skbuff.h>
+
+struct neighbour;
+struct neigh_parms;
+struct sk_buff;
+
+struct netif_rx_stats
+{
+	unsigned total;
+	unsigned dropped;
+	unsigned time_squeeze;
+	unsigned throttled;
+	unsigned fastroute_hit;
+	unsigned fastroute_success;
+	unsigned fastroute_defer;
+	unsigned fastroute_deferred_out;
+	unsigned fastroute_latency_reduction;
+	unsigned cpu_collision;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct netif_rx_stats netdev_rx_stat[];
+
+
+/*
+ *	We tag multicasts with these structures.
+ */
+ 
+struct dev_mc_list
+{	
+	struct dev_mc_list	*next;
+	__u8			dmi_addr[MAX_ADDR_LEN];
+	unsigned char		dmi_addrlen;
+	int			dmi_users;
+	int			dmi_gusers;
+};
+
+struct hh_cache
+{
+	struct hh_cache *hh_next;	/* Next entry			     */
+	atomic_t	hh_refcnt;	/* number of users                   */
+	unsigned short  hh_type;	/* protocol identifier, f.e ETH_P_IP
+                                         *  NOTE:  For VLANs, this will be the
+                                         *  encapuslated type. --BLG
+                                         */
+	int		hh_len;		/* length of header */
+	int		(*hh_output)(struct sk_buff *skb);
+	rwlock_t	hh_lock;
+	/* cached hardware header; allow for machine alignment needs.        */
+	unsigned long	hh_data[16/sizeof(unsigned long)];
+};
+
+/* These flag bits are private to the generic network queueing
+ * layer, they may not be explicitly referenced by any other
+ * code.
+ */
+
+enum netdev_state_t
+{
+	__LINK_STATE_XOFF=0,
+	__LINK_STATE_START,
+	__LINK_STATE_PRESENT,
+	__LINK_STATE_NOCARRIER
+};
+
+
+/*
+ *	The DEVICE structure.
+ *	Actually, this whole structure is a big mistake.  It mixes I/O
+ *	data with strictly "high-level" data, and it has to know about
+ *	almost every data structure used in the INET module.
+ *
+ *	FIXME: cleanup struct net_device such that network protocol info
+ *	moves out.
+ */
+
+struct net_device
+{
+	/*
+	 * This is the first field of the "visible" part of this structure
+	 * (i.e. as seen by users in the "Space.c" file).  It is the name
+	 * the interface.
+	 */
+	char			name[IFNAMSIZ];
+
+	/*
+	 *	I/O specific fields
+	 *	FIXME: Merge these and struct ifmap into one
+	 */
+	unsigned long		rmem_end;	/* shmem "recv" end	*/
+	unsigned long		rmem_start;	/* shmem "recv" start	*/
+	unsigned long		mem_end;	/* shared mem end	*/
+	unsigned long		mem_start;	/* shared mem start	*/
+	unsigned long		base_addr;	/* device I/O address	*/
+	unsigned int		irq;		/* device IRQ number	*/
+
+	/*
+	 *	Some hardware also needs these fields, but they are not
+	 *	part of the usual set specified in Space.c.
+	 */
+
+	unsigned char		if_port;	/* Selectable AUI, TP,..*/
+	unsigned char		dma;		/* DMA channel		*/
+
+	unsigned long		state;
+
+	struct net_device	*next;
+	
+	/* The device initialization function. Called only once. */
+	int			(*init)(struct net_device *dev);
+
+	/* ------- Fields preinitialized in Space.c finish here ------- */
+
+	struct net_device	*next_sched;
+
+	/* Interface index. Unique device identifier	*/
+	int			ifindex;
+	int			iflink;
+
+
+	struct net_device_stats* (*get_stats)(struct net_device *dev);
+	struct iw_statistics*	(*get_wireless_stats)(struct net_device *dev);
+
+	/*
+	 * This marks the end of the "visible" part of the structure. All
+	 * fields hereafter are internal to the system, and may change at
+	 * will (read: may be cleaned up at will).
+	 */
+
+	/* These may be needed for future network-power-down code. */
+	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
+	unsigned long		last_rx;	/* Time of last Rx	*/
+
+	unsigned short		flags;	/* interface flags (a la BSD)	*/
+	unsigned short		gflags;
+        unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
+        unsigned short          unused_alignment_fixer; /* Because we need priv_flags,
+                                                         * and we want to be 32-bit aligned.
+                                                         */
+
+	unsigned		mtu;	/* interface MTU value		*/
+	unsigned short		type;	/* interface hardware type	*/
+	unsigned short		hard_header_len;	/* hardware hdr length	*/
+	void			*priv;	/* pointer to private data	*/
+
+	struct net_device	*master; /* Pointer to master device of a group,
+					  * which this device is member of.
+					  */
+
+	/* Interface address info. */
+	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
+	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address	*/
+	unsigned char		addr_len;	/* hardware address length	*/
+
+	struct dev_mc_list	*mc_list;	/* Multicast mac addresses	*/
+	int			mc_count;	/* Number of installed mcasts	*/
+	int			promiscuity;
+	int			allmulti;
+
+	int			watchdog_timeo;
+	struct timer_list	watchdog_timer;
+
+	/* Protocol specific pointers */
+	
+	void 			*atalk_ptr;	/* AppleTalk link 	*/
+	void			*ip_ptr;	/* IPv4 specific data	*/  
+	void                    *dn_ptr;        /* DECnet specific data */
+	void                    *ip6_ptr;       /* IPv6 specific data */
+	void			*ec_ptr;	/* Econet specific data	*/
+
+	/* hard_start_xmit synchronizer */
+	spinlock_t		xmit_lock;
+	/* cpu id of processor entered to hard_start_xmit or -1,
+	   if nobody entered there.
+	 */
+	int			xmit_lock_owner;
+	/* device queue lock */
+	spinlock_t		queue_lock;
+	/* Number of references to this device */
+	atomic_t		refcnt;
+	/* The flag marking that device is unregistered, but held by an user */
+	int			deadbeaf;
+
+	/* Net device features */
+	int			features;
+#define NETIF_F_SG		1	/* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_DYNALLOC	16	/* Self-dectructable device. */
+#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX      128     /* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX      256     /* Receive VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_FILTER  512     /* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED 1024    /* Device cannot handle VLAN packets */
+
+	/* Called after device is detached from network. */
+	void			(*uninit)(struct net_device *dev);
+	/* Called after last user reference disappears. */
+	void			(*destructor)(struct net_device *dev);
+
+	/* Pointers to interface service routines.	*/
+	int			(*open)(struct net_device *dev);
+	int			(*stop)(struct net_device *dev);
+	int			(*hard_start_xmit) (struct sk_buff *skb,
+						    struct net_device *dev);
+#if 0
+	int                     (*poll) (struct net_device *dev, int *quota); /* XXX IAP */
+#endif
+	int			(*hard_header) (struct sk_buff *skb,
+						struct net_device *dev,
+						unsigned short type,
+						void *daddr,
+						void *saddr,
+						unsigned len);
+	int			(*rebuild_header)(struct sk_buff *skb);
+#define HAVE_MULTICAST			 
+	void			(*set_multicast_list)(struct net_device *dev);
+#define HAVE_SET_MAC_ADDR  		 
+	int			(*set_mac_address)(struct net_device *dev,
+						   void *addr);
+#define HAVE_PRIVATE_IOCTL
+	int			(*do_ioctl)(struct net_device *dev,
+					    struct ifreq *ifr, int cmd);
+#define HAVE_SET_CONFIG
+	int			(*set_config)(struct net_device *dev,
+					      struct ifmap *map);
+#define HAVE_HEADER_CACHE
+	int			(*hard_header_cache)(struct neighbour *neigh,
+						     struct hh_cache *hh);
+	void			(*header_cache_update)(struct hh_cache *hh,
+						       struct net_device *dev,
+						       unsigned char *  haddr);
+#define HAVE_CHANGE_MTU
+	int			(*change_mtu)(struct net_device *dev, int new_mtu);
+
+#define HAVE_TX_TIMEOUT
+	void			(*tx_timeout) (struct net_device *dev);
+
+        void                    (*vlan_rx_register)(struct net_device *dev,
+                                                    struct vlan_group *grp);
+        void                    (*vlan_rx_add_vid)(struct net_device *dev,
+                                                   unsigned short vid);
+        void                    (*vlan_rx_kill_vid)(struct net_device *dev,
+                                                    unsigned short vid);
+
+	int			(*hard_header_parse)(struct sk_buff *skb,
+						     unsigned char *haddr);
+	int			(*neigh_setup)(struct net_device *dev, struct neigh_parms *);
+//	int			(*accept_fastpath)(struct net_device *, struct dst_entry*);
+
+	/* open/release and usage marking */
+	struct module *owner;
+
+	/* bridge stuff */
+	struct net_bridge_port	*br_port;
+};
+
+
+struct packet_type 
+{
+	unsigned short		type;	/* This is really htons(ether_type).	*/
+	struct net_device		*dev;	/* NULL is wildcarded here		*/
+	int			(*func) (struct sk_buff *, struct net_device *,
+					 struct packet_type *);
+	void			*data;	/* Private to the packet type		*/
+	struct packet_type	*next;
+};
+
+
+#include <xeno/interrupt.h>
+
+extern struct net_device		*dev_base;      /* All devices */
+extern rwlock_t				dev_base_lock;	/* Device list lock */
+
+extern int			netdev_boot_setup_add(char *name, struct ifmap *map);
+extern int 			netdev_boot_setup_check(struct net_device *dev);
+extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+extern void		dev_add_pack(struct packet_type *pt);
+extern void		dev_remove_pack(struct packet_type *pt);
+extern int		dev_get(const char *name);
+extern struct net_device	*dev_get_by_name(const char *name);
+extern struct net_device	*__dev_get_by_name(const char *name);
+extern struct net_device	*dev_alloc(const char *name, int *err);
+extern int		dev_alloc_name(struct net_device *dev, const char *name);
+extern int		dev_open(struct net_device *dev);
+extern int		dev_close(struct net_device *dev);
+extern int		register_netdevice(struct net_device *dev);
+extern int		unregister_netdevice(struct net_device *dev);
+extern void dev_shutdown(struct net_device *dev);
+extern void dev_activate(struct net_device *dev);
+extern void dev_deactivate(struct net_device *dev);
+extern void dev_init_scheduler(struct net_device *dev);
+extern int		dev_new_index(void);
+extern struct net_device	*dev_get_by_index(int ifindex);
+extern struct net_device	*__dev_get_by_index(int ifindex);
+extern int		dev_restart(struct net_device *dev);
+
+typedef int gifconf_func_t(struct net_device * dev, char * bufptr, int len);
+extern int		register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+static inline int unregister_gifconf(unsigned int family)
+{
+	return register_gifconf(family, 0);
+}
+
+extern struct tasklet_struct net_tx_tasklet;
+
+extern struct list_head net_schedule_list;
+extern spinlock_t net_schedule_list_lock;
+
+#define HAVE_NETIF_QUEUE
+
+static inline void __netif_schedule(struct net_device *dev)
+{
+	tasklet_schedule(&net_tx_tasklet);
+}
+
+static inline void netif_schedule(struct net_device *dev)
+{
+	if (!test_bit(__LINK_STATE_XOFF, &dev->state))
+		__netif_schedule(dev);
+}
+
+static inline void netif_start_queue(struct net_device *dev)
+{
+	clear_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline void netif_wake_queue(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+		__netif_schedule(dev);
+}
+
+static inline void netif_stop_queue(struct net_device *dev)
+{
+	set_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline int netif_queue_stopped(struct net_device *dev)
+{
+	return test_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline int netif_running(struct net_device *dev)
+{
+	return test_bit(__LINK_STATE_START, &dev->state);
+}
+
+
+/*
+ * Xen does not need deferred skb freeing, as all destructor hook functions 
+ * are IRQ safe. Linux needed more care for some destructors...
+ */
+#define dev_kfree_skb_irq(_skb) dev_kfree_skb(_skb)
+#define dev_kfree_skb_any(_skb) dev_kfree_skb(_skb)
+
+extern void		net_call_rx_atomic(void (*fn)(void));
+extern int		netif_rx(struct sk_buff *skb);
+extern int		dev_ioctl(unsigned int cmd, void *);
+extern int		dev_change_flags(struct net_device *, unsigned);
+extern void		dev_init(void);
+
+extern int		netdev_nit;
+
+/* Post buffer to the network code from _non interrupt_ context.
+ * see net/core/dev.c for netif_rx description.
+ */
+static inline int netif_rx_ni(struct sk_buff *skb)
+{
+       int err = netif_rx(skb);
+       if (softirq_pending(smp_processor_id()))
+               do_softirq();
+       return err;
+}
+
+extern int netdev_finish_unregister(struct net_device *dev);
+
+static inline void dev_put(struct net_device *dev)
+{
+	if (atomic_dec_and_test(&dev->refcnt))
+		netdev_finish_unregister(dev);
+}
+
+#define __dev_put(dev) atomic_dec(&(dev)->refcnt)
+#define dev_hold(dev) atomic_inc(&(dev)->refcnt)
+
+/* Carrier loss detection, dial on demand. The functions netif_carrier_on
+ * and _off may be called from IRQ context, but it is caller
+ * who is responsible for serialization of these calls.
+ */
+
+static inline int netif_carrier_ok(struct net_device *dev)
+{
+	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
+}
+
+extern void __netdev_watchdog_up(struct net_device *dev);
+
+static inline void netif_carrier_on(struct net_device *dev)
+{
+	clear_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	if (netif_running(dev))
+		__netdev_watchdog_up(dev);
+}
+
+static inline void netif_carrier_off(struct net_device *dev)
+{
+	set_bit(__LINK_STATE_NOCARRIER, &dev->state);
+}
+
+/* Hot-plugging. */
+static inline int netif_device_present(struct net_device *dev)
+{
+	return test_bit(__LINK_STATE_PRESENT, &dev->state);
+}
+
+static inline void netif_device_detach(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_stop_queue(dev);
+	}
+}
+
+static inline void netif_device_attach(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	    netif_running(dev)) {
+		netif_wake_queue(dev);
+ 		__netdev_watchdog_up(dev);
+	}
+}
+
+/*
+ * Network interface message level settings
+ */
+#define HAVE_NETIF_MSG 1
+
+enum {
+	NETIF_MSG_DRV		= 0x0001,
+	NETIF_MSG_PROBE		= 0x0002,
+	NETIF_MSG_LINK		= 0x0004,
+	NETIF_MSG_TIMER		= 0x0008,
+	NETIF_MSG_IFDOWN	= 0x0010,
+	NETIF_MSG_IFUP		= 0x0020,
+	NETIF_MSG_RX_ERR	= 0x0040,
+	NETIF_MSG_TX_ERR	= 0x0080,
+	NETIF_MSG_TX_QUEUED	= 0x0100,
+	NETIF_MSG_INTR		= 0x0200,
+	NETIF_MSG_TX_DONE	= 0x0400,
+	NETIF_MSG_RX_STATUS	= 0x0800,
+	NETIF_MSG_PKTDATA	= 0x1000,
+};
+
+#define netif_msg_drv(p)	((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p)	((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p)	((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p)	((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p)	((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p)	((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p)	((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p)	((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p)	((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p)	((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p)	((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p)	((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p)	((p)->msg_enable & NETIF_MSG_PKTDATA)
+
+/* These functions live elsewhere (drivers/net/net_init.c, but related) */
+
+extern void		ether_setup(struct net_device *dev);
+extern void		fddi_setup(struct net_device *dev);
+extern void		tr_setup(struct net_device *dev);
+extern void		fc_setup(struct net_device *dev);
+extern void		fc_freedev(struct net_device *dev);
+/* Support for loadable net-drivers */
+extern int		register_netdev(struct net_device *dev);
+extern void		unregister_netdev(struct net_device *dev);
+/* Functions used for multicast support */
+extern void		dev_mc_upload(struct net_device *dev);
+extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
+extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
+extern void		dev_mc_discard(struct net_device *dev);
+extern void		dev_set_promiscuity(struct net_device *dev, int inc);
+extern void		dev_set_allmulti(struct net_device *dev, int inc);
+extern void		netdev_state_change(struct net_device *dev);
+/* Load a device via the kmod */
+extern void		dev_load(const char *name);
+extern void		dev_mcast_init(void);
+extern int		netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev));
+extern void		netdev_unregister_fc(int bit);
+extern unsigned long	netdev_fc_xoff;
+extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
+extern struct sk_buff * skb_checksum_help(struct sk_buff *skb);
+
+#endif /* __KERNEL__ */
+
+#endif	/* _LINUX_DEV_H */
diff --git a/xen/include/xeno/notifier.h b/xen/include/xeno/notifier.h
new file mode 100644
index 0000000000..0db9736c11
--- /dev/null
+++ b/xen/include/xeno/notifier.h
@@ -0,0 +1,64 @@
+/*
+ *	Routines to manage notifier chains for passing status changes to any
+ *	interested routines. We need this instead of hard coded call lists so
+ *	that modules can poke their nose into the innards. The network devices
+ *	needed them so here they are for the rest of you.
+ *
+ *				Alan Cox <Alan.Cox@linux.org>
+ */
+ 
+#ifndef _LINUX_NOTIFIER_H
+#define _LINUX_NOTIFIER_H
+#include <linux/errno.h>
+
+struct notifier_block
+{
+	int (*notifier_call)(struct notifier_block *self, unsigned long, void *);
+	struct notifier_block *next;
+	int priority;
+};
+
+
+#ifdef __KERNEL__
+
+extern int notifier_chain_register(struct notifier_block **list, struct notifier_block *n);
+extern int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n);
+extern int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v);
+
+#define NOTIFY_DONE		0x0000		/* Don't care */
+#define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
+
+/*
+ *	Declared notifiers so far. I can imagine quite a few more chains
+ *	over time (eg laptop power reset chains, reboot chain (to clean 
+ *	device units up), device [un]mount chain, module load/unload chain,
+ *	low memory chain, screenblank chain (for plug in modular screenblankers) 
+ *	VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
+ */
+ 
+/* netdevice notifier chain */
+#define NETDEV_UP	0x0001	/* For now you can't veto a device up/down */
+#define NETDEV_DOWN	0x0002
+#define NETDEV_REBOOT	0x0003	/* Tell a protocol stack a network interface
+				   detected a hardware crash and restarted
+				   - we can use this eg to kick tcp sessions
+				   once done */
+#define NETDEV_CHANGE	0x0004	/* Notify device state change */
+#define NETDEV_REGISTER 0x0005
+#define NETDEV_UNREGISTER	0x0006
+#define NETDEV_CHANGEMTU	0x0007
+#define NETDEV_CHANGEADDR	0x0008
+#define NETDEV_GOING_DOWN	0x0009
+#define NETDEV_CHANGENAME	0x000A
+
+#define SYS_DOWN	0x0001	/* Notify of system down */
+#define SYS_RESTART	SYS_DOWN
+#define SYS_HALT	0x0002	/* Notify of system halt */
+#define SYS_POWER_OFF	0x0003	/* Notify of system power off */
+
+#define NETLINK_URELEASE	0x0001	/* Unicast netlink socket released */
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_NOTIFIER_H */
diff --git a/xen/include/xeno/pci.h b/xen/include/xeno/pci.h
new file mode 100644
index 0000000000..33e612491e
--- /dev/null
+++ b/xen/include/xeno/pci.h
@@ -0,0 +1,807 @@
+/*
+ *	$Id: pci.h,v 1.87 1998/10/11 15:13:12 mj Exp $
+ *
+ *	PCI defines and function prototypes
+ *	Copyright 1994, Drew Eckhardt
+ *	Copyright 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ *	For more information, please consult the following manuals (look at
+ *	http://www.pcisig.com/ for how to get them):
+ *
+ *	PCI BIOS Specification
+ *	PCI Local Bus Specification
+ *	PCI to PCI Bridge Specification
+ *	PCI System Design Guide
+ */
+
+#ifndef LINUX_PCI_H
+#define LINUX_PCI_H
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+#define PCI_VENDOR_ID		0x00	/* 16 bits */
+#define PCI_DEVICE_ID		0x02	/* 16 bits */
+#define PCI_COMMAND		0x04	/* 16 bits */
+#define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
+#define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus mastering */
+#define  PCI_COMMAND_SPECIAL	0x8	/* Enable response to special cycles */
+#define  PCI_COMMAND_INVALIDATE	0x10	/* Use memory write and invalidate */
+#define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
+#define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
+#define  PCI_COMMAND_WAIT 	0x80	/* Enable address/data stepping */
+#define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
+#define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
+
+#define PCI_STATUS		0x06	/* 16 bits */
+#define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
+#define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
+#define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
+#define  PCI_STATUS_FAST_BACK	0x80	/* Accept fast-back to back */
+#define  PCI_STATUS_PARITY	0x100	/* Detected parity error */
+#define  PCI_STATUS_DEVSEL_MASK	0x600	/* DEVSEL timing */
+#define  PCI_STATUS_DEVSEL_FAST	0x000	
+#define  PCI_STATUS_DEVSEL_MEDIUM 0x200
+#define  PCI_STATUS_DEVSEL_SLOW 0x400
+#define  PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */
+#define  PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */
+#define  PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */
+#define  PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */
+#define  PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */
+
+#define PCI_CLASS_REVISION	0x08	/* High 24 bits are class, low 8
+					   revision */
+#define PCI_REVISION_ID         0x08    /* Revision ID */
+#define PCI_CLASS_PROG          0x09    /* Reg. Level Programming Interface */
+#define PCI_CLASS_DEVICE        0x0a    /* Device class */
+
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
+#define PCI_HEADER_TYPE		0x0e	/* 8 bits */
+#define  PCI_HEADER_TYPE_NORMAL	0
+#define  PCI_HEADER_TYPE_BRIDGE 1
+#define  PCI_HEADER_TYPE_CARDBUS 2
+
+#define PCI_BIST		0x0f	/* 8 bits */
+#define  PCI_BIST_CODE_MASK	0x0f	/* Return result */
+#define  PCI_BIST_START		0x40	/* 1 to start BIST, 2 secs or less */
+#define  PCI_BIST_CAPABLE	0x80	/* 1 if BIST capable */
+
+/*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of 
+ * 0xffffffff to the register, and reading it back.  Only 
+ * 1 bits are decoded.
+ */
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
+#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
+#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */
+#define  PCI_BASE_ADDRESS_SPACE	0x01	/* 0 = memory, 1 = I/O */
+#define  PCI_BASE_ADDRESS_SPACE_IO 0x01
+#define  PCI_BASE_ADDRESS_SPACE_MEMORY 0x00
+#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
+#define  PCI_BASE_ADDRESS_MEM_TYPE_32	0x00	/* 32 bit address */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_1M	0x02	/* Below 1M [obsolete] */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
+#define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
+#define  PCI_BASE_ADDRESS_MEM_MASK	(~0x0fUL)
+#define  PCI_BASE_ADDRESS_IO_MASK	(~0x03UL)
+/* bit 1 is reserved if address_space = 1 */
+
+/* Header type 0 (normal devices) */
+#define PCI_CARDBUS_CIS		0x28
+#define PCI_SUBSYSTEM_VENDOR_ID	0x2c
+#define PCI_SUBSYSTEM_ID	0x2e  
+#define PCI_ROM_ADDRESS		0x30	/* Bits 31..11 are address, 10..1 reserved */
+#define  PCI_ROM_ADDRESS_ENABLE	0x01
+#define PCI_ROM_ADDRESS_MASK	(~0x7ffUL)
+
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
+
+/* 0x35-0x3b are reserved */
+#define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
+#define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
+#define PCI_MIN_GNT		0x3e	/* 8 bits */
+#define PCI_MAX_LAT		0x3f	/* 8 bits */
+
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SUBORDINATE_BUS	0x1a	/* Highest bus number behind the bridge */
+#define PCI_SEC_LATENCY_TIMER	0x1b	/* Latency timer for secondary interface */
+#define PCI_IO_BASE		0x1c	/* I/O range behind the bridge */
+#define PCI_IO_LIMIT		0x1d
+#define  PCI_IO_RANGE_TYPE_MASK	0x0fUL	/* I/O bridging type */
+#define  PCI_IO_RANGE_TYPE_16	0x00
+#define  PCI_IO_RANGE_TYPE_32	0x01
+#define  PCI_IO_RANGE_MASK	(~0x0fUL)
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
+#define PCI_MEMORY_BASE		0x20	/* Memory range behind */
+#define PCI_MEMORY_LIMIT	0x22
+#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_MEMORY_RANGE_MASK	(~0x0fUL)
+#define PCI_PREF_MEMORY_BASE	0x24	/* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT	0x26
+#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_PREF_RANGE_TYPE_32	0x00
+#define  PCI_PREF_RANGE_TYPE_64	0x01
+#define  PCI_PREF_RANGE_MASK	(~0x0fUL)
+#define PCI_PREF_BASE_UPPER32	0x28	/* Upper half of prefetchable memory range */
+#define PCI_PREF_LIMIT_UPPER32	0x2c
+#define PCI_IO_BASE_UPPER16	0x30	/* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16	0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1	0x38	/* Same as PCI_ROM_ADDRESS, but for htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL	0x3e
+#define  PCI_BRIDGE_CTL_PARITY	0x01	/* Enable parity detection on secondary interface */
+#define  PCI_BRIDGE_CTL_SERR	0x02	/* The same for SERR forwarding */
+#define  PCI_BRIDGE_CTL_NO_ISA	0x04	/* Disable bridging of ISA ports */
+#define  PCI_BRIDGE_CTL_VGA	0x08	/* Forward VGA addresses */
+#define  PCI_BRIDGE_CTL_MASTER_ABORT 0x20  /* Report master aborts */
+#define  PCI_BRIDGE_CTL_BUS_RESET 0x40	/* Secondary bus reset */
+#define  PCI_BRIDGE_CTL_FAST_BACK 0x80	/* Fast Back2Back enabled on secondary interface */
+
+/* Header type 2 (CardBus bridges) */
+#define PCI_CB_CAPABILITY_LIST	0x14
+/* 0x15 reserved */
+#define PCI_CB_SEC_STATUS	0x16	/* Secondary status */
+#define PCI_CB_PRIMARY_BUS	0x18	/* PCI bus number */
+#define PCI_CB_CARD_BUS		0x19	/* CardBus bus number */
+#define PCI_CB_SUBORDINATE_BUS	0x1a	/* Subordinate bus number */
+#define PCI_CB_LATENCY_TIMER	0x1b	/* CardBus latency timer */
+#define PCI_CB_MEMORY_BASE_0	0x1c
+#define PCI_CB_MEMORY_LIMIT_0	0x20
+#define PCI_CB_MEMORY_BASE_1	0x24
+#define PCI_CB_MEMORY_LIMIT_1	0x28
+#define PCI_CB_IO_BASE_0	0x2c
+#define PCI_CB_IO_BASE_0_HI	0x2e
+#define PCI_CB_IO_LIMIT_0	0x30
+#define PCI_CB_IO_LIMIT_0_HI	0x32
+#define PCI_CB_IO_BASE_1	0x34
+#define PCI_CB_IO_BASE_1_HI	0x36
+#define PCI_CB_IO_LIMIT_1	0x38
+#define PCI_CB_IO_LIMIT_1_HI	0x3a
+#define  PCI_CB_IO_RANGE_MASK	(~0x03UL)
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_CB_BRIDGE_CONTROL	0x3e
+#define  PCI_CB_BRIDGE_CTL_PARITY	0x01	/* Similar to standard bridge control register */
+#define  PCI_CB_BRIDGE_CTL_SERR		0x02
+#define  PCI_CB_BRIDGE_CTL_ISA		0x04
+#define  PCI_CB_BRIDGE_CTL_VGA		0x08
+#define  PCI_CB_BRIDGE_CTL_MASTER_ABORT	0x20
+#define  PCI_CB_BRIDGE_CTL_CB_RESET	0x40	/* CardBus reset */
+#define  PCI_CB_BRIDGE_CTL_16BIT_INT	0x80	/* Enable interrupt for 16-bit cards */
+#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100	/* Prefetch enable for both memory regions */
+#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
+#define  PCI_CB_BRIDGE_CTL_POST_WRITES	0x400
+#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40
+#define PCI_CB_SUBSYSTEM_ID	0x42
+#define PCI_CB_LEGACY_MODE_BASE	0x44	/* 16-bit PC Card legacy mode base address (ExCa) */
+/* 0x48-0x7f reserved */
+
+/* Capability lists */
+
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define  PCI_CAP_ID_PM		0x01	/* Power Management */
+#define  PCI_CAP_ID_AGP		0x02	/* Accelerated Graphics Port */
+#define  PCI_CAP_ID_VPD		0x03	/* Vital Product Data */
+#define  PCI_CAP_ID_SLOTID	0x04	/* Slot Identification */
+#define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
+#define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
+#define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+#define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
+#define PCI_CAP_SIZEOF		4
+
+/* Power Management Registers */
+
+#define PCI_PM_PMC              2       /* PM Capabilities Register */
+#define  PCI_PM_CAP_VER_MASK	0x0007	/* Version */
+#define  PCI_PM_CAP_PME_CLOCK	0x0008	/* PME clock required */
+#define  PCI_PM_CAP_RESERVED    0x0010  /* Reserved field */
+#define  PCI_PM_CAP_DSI		0x0020	/* Device specific initialization */
+#define  PCI_PM_CAP_AUX_POWER	0x01C0	/* Auxilliary power support mask */
+#define  PCI_PM_CAP_D1		0x0200	/* D1 power state support */
+#define  PCI_PM_CAP_D2		0x0400	/* D2 power state support */
+#define  PCI_PM_CAP_PME		0x0800	/* PME pin supported */
+#define  PCI_PM_CAP_PME_MASK    0xF800  /* PME Mask of all supported states */
+#define  PCI_PM_CAP_PME_D0      0x0800  /* PME# from D0 */
+#define  PCI_PM_CAP_PME_D1      0x1000  /* PME# from D1 */
+#define  PCI_PM_CAP_PME_D2      0x2000  /* PME# from D2 */
+#define  PCI_PM_CAP_PME_D3      0x4000  /* PME# from D3 (hot) */
+#define  PCI_PM_CAP_PME_D3cold  0x8000  /* PME# from D3 (cold) */
+#define PCI_PM_CTRL		4	/* PM control and status register */
+#define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
+#define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
+#define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
+#define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
+#define  PCI_PM_CTRL_PME_STATUS	0x8000	/* PME pin status */
+#define PCI_PM_PPB_EXTENSIONS	6	/* PPB support extensions (??) */
+#define  PCI_PM_PPB_B2_B3	0x40	/* Stop clock when in D3hot (??) */
+#define  PCI_PM_BPCC_ENABLE	0x80	/* Bus power/clock control enable (??) */
+#define PCI_PM_DATA_REGISTER	7	/* (??) */
+#define PCI_PM_SIZEOF		8
+
+/* AGP registers */
+
+#define PCI_AGP_VERSION		2	/* BCD version number */
+#define PCI_AGP_RFU		3	/* Rest of capability flags */
+#define PCI_AGP_STATUS		4	/* Status register */
+#define  PCI_AGP_STATUS_RQ_MASK	0xff000000	/* Maximum number of requests - 1 */
+#define  PCI_AGP_STATUS_SBA	0x0200	/* Sideband addressing supported */
+#define  PCI_AGP_STATUS_64BIT	0x0020	/* 64-bit addressing supported */
+#define  PCI_AGP_STATUS_FW	0x0010	/* FW transfers supported */
+#define  PCI_AGP_STATUS_RATE4	0x0004	/* 4x transfer rate supported */
+#define  PCI_AGP_STATUS_RATE2	0x0002	/* 2x transfer rate supported */
+#define  PCI_AGP_STATUS_RATE1	0x0001	/* 1x transfer rate supported */
+#define PCI_AGP_COMMAND		8	/* Control register */
+#define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
+#define  PCI_AGP_COMMAND_SBA	0x0200	/* Sideband addressing enabled */
+#define  PCI_AGP_COMMAND_AGP	0x0100	/* Allow processing of AGP transactions */
+#define  PCI_AGP_COMMAND_64BIT	0x0020 	/* Allow processing of 64-bit addresses */
+#define  PCI_AGP_COMMAND_FW	0x0010 	/* Force FW transfers */
+#define  PCI_AGP_COMMAND_RATE4	0x0004	/* Use 4x rate */
+#define  PCI_AGP_COMMAND_RATE2	0x0002	/* Use 2x rate */
+#define  PCI_AGP_COMMAND_RATE1	0x0001	/* Use 1x rate */
+#define PCI_AGP_SIZEOF		12
+
+/* Slot Identification */
+
+#define PCI_SID_ESR		2	/* Expansion Slot Register */
+#define  PCI_SID_ESR_NSLOTS	0x1f	/* Number of expansion slots available */
+#define  PCI_SID_ESR_FIC	0x20	/* First In Chassis Flag */
+#define PCI_SID_CHASSIS_NR	3	/* Chassis Number */
+
+/* Message Signalled Interrupts registers */
+
+#define PCI_MSI_FLAGS		2	/* Various flags */
+#define  PCI_MSI_FLAGS_64BIT	0x80	/* 64-bit addresses allowed */
+#define  PCI_MSI_FLAGS_QSIZE	0x70	/* Message queue size configured */
+#define  PCI_MSI_FLAGS_QMASK	0x0e	/* Maximum queue size available */
+#define  PCI_MSI_FLAGS_ENABLE	0x01	/* MSI feature enabled */
+#define PCI_MSI_RFU		3	/* Rest of capability flags */
+#define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
+#define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
+
+/* CompactPCI Hotswap Register */
+
+#define PCI_CHSWP_CSR		2	/* Control and Status Register */
+#define  PCI_CHSWP_DHA		0x01	/* Device Hiding Arm */
+#define  PCI_CHSWP_EIM		0x02	/* ENUM# Signal Mask */
+#define  PCI_CHSWP_PIE		0x04	/* Pending Insert or Extract */
+#define  PCI_CHSWP_LOO		0x08	/* LED On / Off */
+#define  PCI_CHSWP_PI		0x30	/* Programming Interface */
+#define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
+#define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
+
+/* PCI-X registers */
+
+#define PCI_X_CMD		2	/* Modes & Features */
+#define  PCI_X_CMD_DPERR_E	0x0001	/* Data Parity Error Recovery Enable */
+#define  PCI_X_CMD_ERO		0x0002	/* Enable Relaxed Ordering */
+#define  PCI_X_CMD_MAX_READ	0x000c	/* Max Memory Read Byte Count */
+#define  PCI_X_CMD_MAX_SPLIT	0x0070	/* Max Outstanding Split Transactions */
+#define PCI_X_DEVFN		4	/* A copy of devfn. */
+#define PCI_X_BUSNR		5	/* Bus segment number */
+#define PCI_X_STATUS		6	/* PCI-X capabilities */
+#define  PCI_X_STATUS_64BIT	0x0001	/* 64-bit device */
+#define  PCI_X_STATUS_133MHZ	0x0002	/* 133 MHz capable */
+#define  PCI_X_STATUS_SPL_DISC	0x0004	/* Split Completion Discarded */
+#define  PCI_X_STATUS_UNX_SPL	0x0008	/* Unexpected Split Completion */
+#define  PCI_X_STATUS_COMPLEX	0x0010	/* Device Complexity */
+#define  PCI_X_STATUS_MAX_READ	0x0060	/* Designed Maximum Memory Read Count */
+#define  PCI_X_STATUS_MAX_SPLIT	0x0380	/* Design Max Outstanding Split Trans */
+#define  PCI_X_STATUS_MAX_CUM	0x1c00	/* Designed Max Cumulative Read Size */
+#define  PCI_X_STATUS_SPL_ERR	0x2000	/* Rcvd Split Completion Error Msg */
+
+/* Include the ID list */
+
+#include <linux/pci_ids.h>
+
+/*
+ * The PCI interface treats multi-function devices as independent
+ * devices.  The slot/function address of each device is encoded
+ * in a single byte as follows:
+ *
+ *	7:3 = slot
+ *	2:0 = function
+ */
+#define PCI_DEVFN(slot,func)	((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn)		(((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn)		((devfn) & 0x07)
+
+/* Ioctls for /proc/bus/pci/X/Y nodes. */
+#define PCIIOC_BASE		('P' << 24 | 'C' << 16 | 'I' << 8)
+#define PCIIOC_CONTROLLER	(PCIIOC_BASE | 0x00)	/* Get controller for PCI device. */
+#define PCIIOC_MMAP_IS_IO	(PCIIOC_BASE | 0x01)	/* Set mmap state to I/O space. */
+#define PCIIOC_MMAP_IS_MEM	(PCIIOC_BASE | 0x02)	/* Set mmap state to MEM space. */
+#define PCIIOC_WRITE_COMBINE	(PCIIOC_BASE | 0x03)	/* Enable/disable write-combining. */
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/ioport.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+
+/* File state for mmap()s on /proc/bus/pci/X/Y */
+enum pci_mmap_state {
+	pci_mmap_io,
+	pci_mmap_mem
+};
+
+/* This defines the direction arg to the DMA mapping routines. */
+#define PCI_DMA_BIDIRECTIONAL	0
+#define PCI_DMA_TODEVICE	1
+#define PCI_DMA_FROMDEVICE	2
+#define PCI_DMA_NONE		3
+
+#define DEVICE_COUNT_COMPATIBLE	4
+#define DEVICE_COUNT_IRQ	2
+#define DEVICE_COUNT_DMA	2
+#define DEVICE_COUNT_RESOURCE	12
+
+#define PCI_ANY_ID (~0)
+
+#define pci_present pcibios_present
+
+
+#define pci_for_each_dev_reverse(dev) \
+	for(dev = pci_dev_g(pci_devices.prev); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.prev))
+
+#define pci_for_each_bus(bus) \
+for(bus = pci_bus_b(pci_root_buses.next); bus != pci_bus_b(&pci_root_buses); bus = pci_bus_b(bus->node.next))
+
+/*
+ * The pci_dev structure is used to describe both PCI and ISAPnP devices.
+ */
+struct pci_dev {
+	struct list_head global_list;	/* node in list of all PCI devices */
+	struct list_head bus_list;	/* node in per-bus list */
+	struct pci_bus	*bus;		/* bus this device is on */
+	struct pci_bus	*subordinate;	/* bus this device bridges to */
+
+	void		*sysdata;	/* hook for sys-specific extension */
+	struct proc_dir_entry *procent;	/* device entry in /proc/bus/pci */
+
+	unsigned int	devfn;		/* encoded device & function index */
+	unsigned short	vendor;
+	unsigned short	device;
+	unsigned short	subsystem_vendor;
+	unsigned short	subsystem_device;
+	unsigned int	class;		/* 3 bytes: (base,sub,prog-if) */
+	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
+	u8		rom_base_reg;	/* which config register controls the ROM */
+
+	struct pci_driver *driver;	/* which driver has allocated this device */
+	void		*driver_data;	/* data private to the driver */
+	u64		dma_mask;	/* Mask of the bits of bus address this
+					   device implements.  Normally this is
+					   0xffffffff.  You only need to change
+					   this if your device has broken DMA
+					   or supports 64-bit transfers.  */
+
+	u32             current_state;  /* Current operating state. In ACPI-speak,
+					   this is D0-D3, D0 being fully functional,
+					   and D3 being off. */
+
+	/* device is compatible with these IDs */
+	unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
+	unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
+
+	/*
+	 * Instead of touching interrupt line and base address registers
+	 * directly, use the values stored here. They might be different!
+	 */
+	unsigned int	irq;
+	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+	struct resource dma_resource[DEVICE_COUNT_DMA];
+	struct resource irq_resource[DEVICE_COUNT_IRQ];
+
+	char		name[90];	/* device name */
+	char		slot_name[8];	/* slot name */
+	int		active;		/* ISAPnP: device is active */
+	int		ro;		/* ISAPnP: read only */
+	unsigned short	regs;		/* ISAPnP: supported registers */
+
+	/* These fields are used by common fixups */
+	unsigned short	transparent:1;	/* Transparent PCI bridge */
+
+	int (*prepare)(struct pci_dev *dev);	/* ISAPnP hooks */
+	int (*activate)(struct pci_dev *dev);
+	int (*deactivate)(struct pci_dev *dev);
+};
+
+#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
+#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list)
+
+/*
+ *  For PCI devices, the region numbers are assigned this way:
+ *
+ *	0-5	standard PCI regions
+ *	6	expansion ROM
+ *	7-10	bridges: address space assigned to buses behind the bridge
+ */
+
+#define PCI_ROM_RESOURCE 6
+#define PCI_BRIDGE_RESOURCES 7
+#define PCI_NUM_RESOURCES 11
+  
+#define PCI_REGION_FLAG_MASK 0x0fU	/* These bits of resource flags tell us the PCI region flags */
+
+struct pci_bus {
+	struct list_head node;		/* node in list of buses */
+	struct pci_bus	*parent;	/* parent bus this bridge is on */
+	struct list_head children;	/* list of child buses */
+	struct list_head devices;	/* list of devices on this bus */
+	struct pci_dev	*self;		/* bridge device as seen by parent */
+	struct resource	*resource[4];	/* address space routed to this bus */
+
+	struct pci_ops	*ops;		/* configuration access functions */
+	void		*sysdata;	/* hook for sys-specific extension */
+	struct proc_dir_entry *procdir;	/* directory entry in /proc/bus/pci */
+
+	unsigned char	number;		/* bus number */
+	unsigned char	primary;	/* number of primary bridge */
+	unsigned char	secondary;	/* number of secondary bridge */
+	unsigned char	subordinate;	/* max number of subordinate buses */
+
+	char		name[48];
+	unsigned short	vendor;
+	unsigned short	device;
+	unsigned int	serial;		/* serial number */
+	unsigned char	pnpver;		/* Plug & Play version */
+	unsigned char	productver;	/* product version */
+	unsigned char	checksum;	/* if zero - checksum passed */
+	unsigned char	pad1;
+};
+
+#define pci_bus_b(n) list_entry(n, struct pci_bus, node)
+
+extern struct list_head pci_root_buses;	/* list of all known PCI buses */
+extern struct list_head pci_devices;	/* list of all devices */
+
+extern struct proc_dir_entry *proc_bus_pci_dir;
+/*
+ * Error values that may be returned by PCI functions.
+ */
+#define PCIBIOS_SUCCESSFUL		0x00
+#define PCIBIOS_FUNC_NOT_SUPPORTED	0x81
+#define PCIBIOS_BAD_VENDOR_ID		0x83
+#define PCIBIOS_DEVICE_NOT_FOUND	0x86
+#define PCIBIOS_BAD_REGISTER_NUMBER	0x87
+#define PCIBIOS_SET_FAILED		0x88
+#define PCIBIOS_BUFFER_TOO_SMALL	0x89
+
+/* Low-level architecture-dependent routines */
+
+struct pci_ops {
+	int (*read_byte)(struct pci_dev *, int where, u8 *val);
+	int (*read_word)(struct pci_dev *, int where, u16 *val);
+	int (*read_dword)(struct pci_dev *, int where, u32 *val);
+	int (*write_byte)(struct pci_dev *, int where, u8 val);
+	int (*write_word)(struct pci_dev *, int where, u16 val);
+	int (*write_dword)(struct pci_dev *, int where, u32 val);
+};
+
+struct pbus_set_ranges_data
+{
+	unsigned long io_start, io_end;
+	unsigned long mem_start, mem_end;
+	unsigned long prefetch_start, prefetch_end;
+};
+
+struct pci_device_id {
+	unsigned int vendor, device;		/* Vendor and device ID or PCI_ANY_ID */
+	unsigned int subvendor, subdevice;	/* Subsystem ID's or PCI_ANY_ID */
+	unsigned int class, class_mask;		/* (class,subclass,prog-if) triplet */
+	unsigned long driver_data;		/* Data private to the driver */
+};
+
+struct pci_driver {
+	struct list_head node;
+	char *name;
+	const struct pci_device_id *id_table;	/* NULL if wants all devices */
+	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
+	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
+	int  (*save_state) (struct pci_dev *dev, u32 state);    /* Save Device Context */
+	int  (*suspend) (struct pci_dev *dev, u32 state);	/* Device suspended */
+	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
+	int  (*enable_wake) (struct pci_dev *dev, u32 state, int enable);   /* Enable wake event */
+};
+
+
+/* these external functions are only available when PCI support is enabled */
+#ifdef CONFIG_PCI
+
+#define pci_for_each_dev(dev) \
+	for(dev = pci_dev_g(pci_devices.next); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.next))
+
+void pcibios_init(void);
+void pcibios_fixup_bus(struct pci_bus *);
+int pcibios_enable_device(struct pci_dev *, int mask);
+char *pcibios_setup (char *str);
+
+/* Used only when drivers/pci/setup.c is used */
+void pcibios_align_resource(void *, struct resource *,
+			    unsigned long, unsigned long);
+void pcibios_update_resource(struct pci_dev *, struct resource *,
+			     struct resource *, int);
+void pcibios_update_irq(struct pci_dev *, int irq);
+void pcibios_fixup_pbus_ranges(struct pci_bus *, struct pbus_set_ranges_data *);
+
+/* Backward compatibility, don't use in new code! */
+
+int pcibios_present(void);
+int pcibios_read_config_byte (unsigned char bus, unsigned char dev_fn,
+			      unsigned char where, unsigned char *val);
+int pcibios_read_config_word (unsigned char bus, unsigned char dev_fn,
+			      unsigned char where, unsigned short *val);
+int pcibios_read_config_dword (unsigned char bus, unsigned char dev_fn,
+			       unsigned char where, unsigned int *val);
+int pcibios_write_config_byte (unsigned char bus, unsigned char dev_fn,
+			       unsigned char where, unsigned char val);
+int pcibios_write_config_word (unsigned char bus, unsigned char dev_fn,
+			       unsigned char where, unsigned short val);
+int pcibios_write_config_dword (unsigned char bus, unsigned char dev_fn,
+				unsigned char where, unsigned int val);
+int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn);
+int pcibios_find_device (unsigned short vendor, unsigned short dev_id,
+			 unsigned short index, unsigned char *bus,
+			 unsigned char *dev_fn);
+
+/* Generic PCI functions used internally */
+
+void pci_init(void);
+int pci_bus_exists(const struct list_head *list, int nr);
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
+struct pci_bus *pci_alloc_primary_bus(int bus);
+struct pci_dev *pci_scan_slot(struct pci_dev *temp);
+int pci_proc_attach_device(struct pci_dev *dev);
+int pci_proc_detach_device(struct pci_dev *dev);
+int pci_proc_attach_bus(struct pci_bus *bus);
+int pci_proc_detach_bus(struct pci_bus *bus);
+void pci_name_device(struct pci_dev *dev);
+char *pci_class_name(u32 class);
+void pci_read_bridge_bases(struct pci_bus *child);
+struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res);
+int pci_setup_device(struct pci_dev *dev);
+int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
+
+/* Generic PCI functions exported to card drivers */
+
+struct pci_dev *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from);
+struct pci_dev *pci_find_subsys (unsigned int vendor, unsigned int device,
+				 unsigned int ss_vendor, unsigned int ss_device,
+				 const struct pci_dev *from);
+struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from);
+struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
+int pci_find_capability (struct pci_dev *dev, int cap);
+
+int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
+int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
+int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
+int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
+int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
+int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
+
+int pci_enable_device(struct pci_dev *dev);
+int pci_enable_device_bars(struct pci_dev *dev, int mask);
+void pci_disable_device(struct pci_dev *dev);
+void pci_set_master(struct pci_dev *dev);
+#define HAVE_PCI_SET_MWI
+int pci_set_mwi(struct pci_dev *dev);
+void pci_clear_mwi(struct pci_dev *dev);
+int pdev_set_mwi(struct pci_dev *dev);
+int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_assign_resource(struct pci_dev *dev, int i);
+
+/* Power management related routines */
+int pci_save_state(struct pci_dev *dev, u32 *buffer);
+int pci_restore_state(struct pci_dev *dev, u32 *buffer);
+int pci_set_power_state(struct pci_dev *dev, int state);
+int pci_enable_wake(struct pci_dev *dev, u32 state, int enable);
+
+/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+
+int pci_claim_resource(struct pci_dev *, int);
+void pci_assign_unassigned_resources(void);
+void pdev_enable_device(struct pci_dev *);
+void pdev_sort_resources(struct pci_dev *, struct resource_list *);
+unsigned long pci_bridge_check_io(struct pci_dev *);
+void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
+		    int (*)(struct pci_dev *, u8, u8));
+#define HAVE_PCI_REQ_REGIONS	2
+int pci_request_regions(struct pci_dev *, char *);
+void pci_release_regions(struct pci_dev *);
+int pci_request_region(struct pci_dev *, int, char *);
+void pci_release_region(struct pci_dev *, int);
+
+/* New-style probing supporting hot-pluggable devices */
+int pci_register_driver(struct pci_driver *);
+void pci_unregister_driver(struct pci_driver *);
+void pci_insert_device(struct pci_dev *, struct pci_bus *);
+void pci_remove_device(struct pci_dev *);
+struct pci_driver *pci_dev_driver(const struct pci_dev *);
+const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev);
+void pci_announce_device_to_drivers(struct pci_dev *);
+unsigned int pci_do_scan_bus(struct pci_bus *bus);
+struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr);
+
+#if 0
+/* kmem_cache style wrapper around pci_alloc_consistent() */
+struct pci_pool *pci_pool_create (const char *name, struct pci_dev *dev,
+		size_t size, size_t align, size_t allocation, int flags);
+void pci_pool_destroy (struct pci_pool *pool);
+
+void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle);
+void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr);
+#endif
+
+#endif /* CONFIG_PCI */
+
+/* Include architecture-dependent settings and functions */
+
+#include <asm/pci.h>
+
+/*
+ *  If the system does not have PCI, clearly these return errors.  Define
+ *  these as simple inline functions to avoid hair in drivers.
+ */
+
+#ifndef CONFIG_PCI
+static inline int pcibios_present(void) { return 0; }
+static inline int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn) 
+{ 	return PCIBIOS_DEVICE_NOT_FOUND; }
+
+#define _PCI_NOP(o,s,t) \
+	static inline int pcibios_##o##_config_##s (u8 bus, u8 dfn, u8 where, t val) \
+		{ return PCIBIOS_FUNC_NOT_SUPPORTED; } \
+	static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \
+		{ return PCIBIOS_FUNC_NOT_SUPPORTED; }
+#define _PCI_NOP_ALL(o,x)	_PCI_NOP(o,byte,u8 x) \
+				_PCI_NOP(o,word,u16 x) \
+				_PCI_NOP(o,dword,u32 x)
+_PCI_NOP_ALL(read, *)
+_PCI_NOP_ALL(write,)
+
+static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_class(unsigned int class, const struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int device,
+unsigned int ss_vendor, unsigned int ss_device, const struct pci_dev *from)
+{ return NULL; }
+
+static inline void pci_set_master(struct pci_dev *dev) { }
+static inline int pci_enable_device_bars(struct pci_dev *dev, int mask) { return -EBUSY; }
+static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
+static inline void pci_disable_device(struct pci_dev *dev) { }
+static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; }
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
+static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
+static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;}
+static inline int pci_register_driver(struct pci_driver *drv) { return 0;}
+static inline void pci_unregister_driver(struct pci_driver *drv) { }
+static inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; }
+static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; }
+static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; }
+
+/* Power management related routines */
+static inline int pci_save_state(struct pci_dev *dev, u32 *buffer) { return 0; }
+static inline int pci_restore_state(struct pci_dev *dev, u32 *buffer) { return 0; }
+static inline int pci_set_power_state(struct pci_dev *dev, int state) { return 0; }
+static inline int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) { return 0; }
+
+#define pci_for_each_dev(dev) \
+	for(dev = NULL; 0; )
+
+#else
+
+/*
+ * a helper function which helps ensure correct pci_driver
+ * setup and cleanup for commonly-encountered hotplug/modular cases
+ *
+ * This MUST stay in a header, as it checks for -DMODULE
+ */
+static inline int pci_module_init(struct pci_driver *drv)
+{
+	int rc = pci_register_driver (drv);
+
+	if (rc > 0)
+		return 0;
+
+	/* iff CONFIG_HOTPLUG and built into kernel, we should
+	 * leave the driver around for future hotplug events.
+	 * For the module case, a hotplug daemon of some sort
+	 * should load a module in response to an insert event. */
+#if defined(CONFIG_HOTPLUG) && !defined(MODULE)
+	if (rc == 0)
+		return 0;
+#else
+	if (rc == 0)
+		rc = -ENODEV;		
+#endif
+
+	/* if we get here, we need to clean up pci driver instance
+	 * and return some sort of error */
+	pci_unregister_driver (drv);
+	
+	return rc;
+}
+
+#endif /* !CONFIG_PCI */
+
+/* these helpers provide future and backwards compatibility
+ * for accessing popular PCI BAR info */
+#define pci_resource_start(dev,bar)   ((dev)->resource[(bar)].start)
+#define pci_resource_end(dev,bar)     ((dev)->resource[(bar)].end)
+#define pci_resource_flags(dev,bar)   ((dev)->resource[(bar)].flags)
+#define pci_resource_len(dev,bar) \
+	((pci_resource_start((dev),(bar)) == 0 &&	\
+	  pci_resource_end((dev),(bar)) ==		\
+	  pci_resource_start((dev),(bar))) ? 0 :	\
+	  						\
+	 (pci_resource_end((dev),(bar)) -		\
+	  pci_resource_start((dev),(bar)) + 1))
+
+/* Similar to the helpers above, these manipulate per-pci_dev
+ * driver-specific data.  Currently stored as pci_dev::driver_data,
+ * a void pointer, but it is not present on older kernels.
+ */
+static inline void *pci_get_drvdata (struct pci_dev *pdev)
+{
+	return pdev->driver_data;
+}
+
+static inline void pci_set_drvdata (struct pci_dev *pdev, void *data)
+{
+	pdev->driver_data = data;
+}
+
+/*
+ *  The world is not perfect and supplies us with broken PCI devices.
+ *  For at least a part of these bugs we need a work-around, so both
+ *  generic (drivers/pci/quirks.c) and per-architecture code can define
+ *  fixup hooks to be called for particular buggy devices.
+ */
+
+struct pci_fixup {
+	int pass;
+	u16 vendor, device;			/* You can use PCI_ANY_ID here of course */
+	void (*hook)(struct pci_dev *dev);
+};
+
+extern struct pci_fixup pcibios_fixups[];
+
+#define PCI_FIXUP_HEADER	1		/* Called immediately after reading configuration header */
+#define PCI_FIXUP_FINAL		2		/* Final phase of device fixups */
+
+void pci_fixup_device(int pass, struct pci_dev *dev);
+
+extern int pci_pci_problems;
+#define PCIPCI_FAIL		1
+#define PCIPCI_TRITON		2
+#define PCIPCI_NATOMA		4
+#define PCIPCI_VIAETBF		8
+#define PCIPCI_VSFX		16
+#define PCIPCI_ALIMAGIK		32
+
+#endif /* __KERNEL__ */
+#endif /* LINUX_PCI_H */
diff --git a/xen/include/xeno/pci_ids.h b/xen/include/xeno/pci_ids.h
new file mode 100644
index 0000000000..3220beb90c
--- /dev/null
+++ b/xen/include/xeno/pci_ids.h
@@ -0,0 +1,1856 @@
+/*
+ *	PCI Class, Vendor and Device IDs
+ *
+ *	Please keep sorted.
+ */
+
+/* Device classes and subclasses */
+
+#define PCI_CLASS_NOT_DEFINED		0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA	0x0001
+
+#define PCI_BASE_CLASS_STORAGE		0x01
+#define PCI_CLASS_STORAGE_SCSI		0x0100
+#define PCI_CLASS_STORAGE_IDE		0x0101
+#define PCI_CLASS_STORAGE_FLOPPY	0x0102
+#define PCI_CLASS_STORAGE_IPI		0x0103
+#define PCI_CLASS_STORAGE_RAID		0x0104
+#define PCI_CLASS_STORAGE_OTHER		0x0180
+
+#define PCI_BASE_CLASS_NETWORK		0x02
+#define PCI_CLASS_NETWORK_ETHERNET	0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING	0x0201
+#define PCI_CLASS_NETWORK_FDDI		0x0202
+#define PCI_CLASS_NETWORK_ATM		0x0203
+#define PCI_CLASS_NETWORK_OTHER		0x0280
+
+#define PCI_BASE_CLASS_DISPLAY		0x03
+#define PCI_CLASS_DISPLAY_VGA		0x0300
+#define PCI_CLASS_DISPLAY_XGA		0x0301
+#define PCI_CLASS_DISPLAY_3D		0x0302
+#define PCI_CLASS_DISPLAY_OTHER		0x0380
+
+#define PCI_BASE_CLASS_MULTIMEDIA	0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO	0x0400
+#define PCI_CLASS_MULTIMEDIA_AUDIO	0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE	0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER	0x0480
+
+#define PCI_BASE_CLASS_MEMORY		0x05
+#define PCI_CLASS_MEMORY_RAM		0x0500
+#define PCI_CLASS_MEMORY_FLASH		0x0501
+#define PCI_CLASS_MEMORY_OTHER		0x0580
+
+#define PCI_BASE_CLASS_BRIDGE		0x06
+#define PCI_CLASS_BRIDGE_HOST		0x0600
+#define PCI_CLASS_BRIDGE_ISA		0x0601
+#define PCI_CLASS_BRIDGE_EISA		0x0602
+#define PCI_CLASS_BRIDGE_MC		0x0603
+#define PCI_CLASS_BRIDGE_PCI		0x0604
+#define PCI_CLASS_BRIDGE_PCMCIA		0x0605
+#define PCI_CLASS_BRIDGE_NUBUS		0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS	0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY	0x0608
+#define PCI_CLASS_BRIDGE_OTHER		0x0680
+
+#define PCI_BASE_CLASS_COMMUNICATION	0x07
+#define PCI_CLASS_COMMUNICATION_SERIAL	0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM	0x0703
+#define PCI_CLASS_COMMUNICATION_OTHER	0x0780
+
+#define PCI_BASE_CLASS_SYSTEM		0x08
+#define PCI_CLASS_SYSTEM_PIC		0x0800
+#define PCI_CLASS_SYSTEM_DMA		0x0801
+#define PCI_CLASS_SYSTEM_TIMER		0x0802
+#define PCI_CLASS_SYSTEM_RTC		0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG	0x0804
+#define PCI_CLASS_SYSTEM_OTHER		0x0880
+
+#define PCI_BASE_CLASS_INPUT		0x09
+#define PCI_CLASS_INPUT_KEYBOARD	0x0900
+#define PCI_CLASS_INPUT_PEN		0x0901
+#define PCI_CLASS_INPUT_MOUSE		0x0902
+#define PCI_CLASS_INPUT_SCANNER		0x0903
+#define PCI_CLASS_INPUT_GAMEPORT	0x0904
+#define PCI_CLASS_INPUT_OTHER		0x0980
+
+#define PCI_BASE_CLASS_DOCKING		0x0a
+#define PCI_CLASS_DOCKING_GENERIC	0x0a00
+#define PCI_CLASS_DOCKING_OTHER		0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR	0x0b
+#define PCI_CLASS_PROCESSOR_386		0x0b00
+#define PCI_CLASS_PROCESSOR_486		0x0b01
+#define PCI_CLASS_PROCESSOR_PENTIUM	0x0b02
+#define PCI_CLASS_PROCESSOR_ALPHA	0x0b10
+#define PCI_CLASS_PROCESSOR_POWERPC	0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS	0x0b30
+#define PCI_CLASS_PROCESSOR_CO		0x0b40
+
+#define PCI_BASE_CLASS_SERIAL		0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE	0x0c00
+#define PCI_CLASS_SERIAL_ACCESS		0x0c01
+#define PCI_CLASS_SERIAL_SSA		0x0c02
+#define PCI_CLASS_SERIAL_USB		0x0c03
+#define PCI_CLASS_SERIAL_FIBER		0x0c04
+#define PCI_CLASS_SERIAL_SMBUS		0x0c05
+
+#define PCI_BASE_CLASS_INTELLIGENT	0x0e
+#define PCI_CLASS_INTELLIGENT_I2O	0x0e00
+
+#define PCI_BASE_CLASS_SATELLITE	0x0f
+#define PCI_CLASS_SATELLITE_TV		0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO	0x0f01
+#define PCI_CLASS_SATELLITE_VOICE	0x0f03
+#define PCI_CLASS_SATELLITE_DATA	0x0f04
+
+#define PCI_BASE_CLASS_CRYPT		0x10
+#define PCI_CLASS_CRYPT_NETWORK		0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT	0x1001
+#define PCI_CLASS_CRYPT_OTHER		0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO		0x1100
+#define PCI_CLASS_SP_OTHER		0x1180
+
+#define PCI_CLASS_OTHERS		0xff
+
+/* Vendors and devices.  Sort key: vendor first, device next. */
+
+#define PCI_VENDOR_ID_DYNALINK		0x0675
+#define PCI_DEVICE_ID_DYNALINK_IS64PH	0x1702
+
+#define PCI_VENDOR_ID_BERKOM			0x0871
+#define PCI_DEVICE_ID_BERKOM_A1T		0xffa1
+#define PCI_DEVICE_ID_BERKOM_T_CONCEPT		0xffa2
+#define PCI_DEVICE_ID_BERKOM_A4T		0xffa4
+#define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO	0xffa8
+
+#define PCI_VENDOR_ID_COMPAQ		0x0e11
+#define PCI_DEVICE_ID_COMPAQ_TOKENRING	0x0508
+#define PCI_DEVICE_ID_COMPAQ_1280	0x3033
+#define PCI_DEVICE_ID_COMPAQ_TRIFLEX	0x4000
+#define PCI_DEVICE_ID_COMPAQ_6010	0x6010
+#define PCI_DEVICE_ID_COMPAQ_TACHYON	0xa0fc
+#define PCI_DEVICE_ID_COMPAQ_SMART2P	0xae10
+#define PCI_DEVICE_ID_COMPAQ_NETEL100	0xae32
+#define PCI_DEVICE_ID_COMPAQ_TRIFLEX_IDE 0xae33
+#define PCI_DEVICE_ID_COMPAQ_NETEL10	0xae34
+#define PCI_DEVICE_ID_COMPAQ_NETFLEX3I	0xae35
+#define PCI_DEVICE_ID_COMPAQ_NETEL100D	0xae40
+#define PCI_DEVICE_ID_COMPAQ_NETEL100PI	0xae43
+#define PCI_DEVICE_ID_COMPAQ_NETEL100I	0xb011
+#define PCI_DEVICE_ID_COMPAQ_CISS	0xb060
+#define PCI_DEVICE_ID_COMPAQ_CISSB	0xb178
+#define PCI_DEVICE_ID_COMPAQ_CISSC 	0x0046
+#define PCI_DEVICE_ID_COMPAQ_THUNDER	0xf130
+#define PCI_DEVICE_ID_COMPAQ_NETFLEX3B	0xf150
+
+#define PCI_VENDOR_ID_NCR		0x1000
+#define PCI_VENDOR_ID_LSI_LOGIC		0x1000
+#define PCI_DEVICE_ID_NCR_53C810	0x0001
+#define PCI_DEVICE_ID_NCR_53C820	0x0002
+#define PCI_DEVICE_ID_NCR_53C825	0x0003
+#define PCI_DEVICE_ID_NCR_53C815	0x0004
+#define PCI_DEVICE_ID_LSI_53C810AP	0x0005
+#define PCI_DEVICE_ID_NCR_53C860	0x0006
+#define PCI_DEVICE_ID_LSI_53C1510	0x000a
+#define PCI_DEVICE_ID_NCR_53C896	0x000b
+#define PCI_DEVICE_ID_NCR_53C895	0x000c
+#define PCI_DEVICE_ID_NCR_53C885	0x000d
+#define PCI_DEVICE_ID_NCR_53C875	0x000f
+#define PCI_DEVICE_ID_NCR_53C1510	0x0010
+#define PCI_DEVICE_ID_LSI_53C895A	0x0012
+#define PCI_DEVICE_ID_LSI_53C875A	0x0013
+#define PCI_DEVICE_ID_LSI_53C1010_33	0x0020
+#define PCI_DEVICE_ID_LSI_53C1010_66	0x0021
+#define PCI_DEVICE_ID_LSI_53C1030	0x0030
+#define PCI_DEVICE_ID_LSI_53C1035	0x0040
+#define PCI_DEVICE_ID_NCR_53C875J	0x008f
+#define PCI_DEVICE_ID_LSI_FC909		0x0621
+#define PCI_DEVICE_ID_LSI_FC929		0x0622
+#define PCI_DEVICE_ID_LSI_FC929_LAN	0x0623
+#define PCI_DEVICE_ID_LSI_FC919		0x0624
+#define PCI_DEVICE_ID_LSI_FC919_LAN	0x0625
+#define PCI_DEVICE_ID_LSI_FC929X	0x0626
+#define PCI_DEVICE_ID_LSI_FC919X	0x0628
+#define PCI_DEVICE_ID_NCR_YELLOWFIN	0x0701
+#define PCI_DEVICE_ID_LSI_61C102	0x0901
+#define PCI_DEVICE_ID_LSI_63C815	0x1000
+
+#define PCI_VENDOR_ID_ATI		0x1002
+/* Mach64 */
+#define PCI_DEVICE_ID_ATI_68800		0x4158
+#define PCI_DEVICE_ID_ATI_215CT222	0x4354
+#define PCI_DEVICE_ID_ATI_210888CX	0x4358
+#define PCI_DEVICE_ID_ATI_215ET222	0x4554
+/* Mach64 / Rage */
+#define PCI_DEVICE_ID_ATI_215GB		0x4742
+#define PCI_DEVICE_ID_ATI_215GD		0x4744
+#define PCI_DEVICE_ID_ATI_215GI		0x4749
+#define PCI_DEVICE_ID_ATI_215GP		0x4750
+#define PCI_DEVICE_ID_ATI_215GQ		0x4751
+#define PCI_DEVICE_ID_ATI_215XL		0x4752
+#define PCI_DEVICE_ID_ATI_215GT		0x4754
+#define PCI_DEVICE_ID_ATI_215GTB	0x4755
+#define PCI_DEVICE_ID_ATI_215_IV	0x4756
+#define PCI_DEVICE_ID_ATI_215_IW	0x4757
+#define PCI_DEVICE_ID_ATI_215_IZ	0x475A
+#define PCI_DEVICE_ID_ATI_210888GX	0x4758
+#define PCI_DEVICE_ID_ATI_215_LB	0x4c42
+#define PCI_DEVICE_ID_ATI_215_LD	0x4c44
+#define PCI_DEVICE_ID_ATI_215_LG	0x4c47
+#define PCI_DEVICE_ID_ATI_215_LI	0x4c49
+#define PCI_DEVICE_ID_ATI_215_LM	0x4c4D
+#define PCI_DEVICE_ID_ATI_215_LN	0x4c4E
+#define PCI_DEVICE_ID_ATI_215_LR	0x4c52
+#define PCI_DEVICE_ID_ATI_215_LS	0x4c53
+#define PCI_DEVICE_ID_ATI_264_LT	0x4c54
+/* Mach64 VT */
+#define PCI_DEVICE_ID_ATI_264VT		0x5654
+#define PCI_DEVICE_ID_ATI_264VU		0x5655
+#define PCI_DEVICE_ID_ATI_264VV		0x5656
+/* Rage128 Pro GL */
+#define PCI_DEVICE_ID_ATI_Rage128_PA	0x5041
+#define PCI_DEVICE_ID_ATI_Rage128_PB	0x5042
+#define PCI_DEVICE_ID_ATI_Rage128_PC	0x5043
+#define PCI_DEVICE_ID_ATI_Rage128_PD	0x5044
+#define PCI_DEVICE_ID_ATI_Rage128_PE	0x5045
+#define PCI_DEVICE_ID_ATI_RAGE128_PF	0x5046
+/* Rage128 Pro VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_PG	0x5047
+#define PCI_DEVICE_ID_ATI_RAGE128_PH	0x5048
+#define PCI_DEVICE_ID_ATI_RAGE128_PI	0x5049
+#define PCI_DEVICE_ID_ATI_RAGE128_PJ	0x504A
+#define PCI_DEVICE_ID_ATI_RAGE128_PK	0x504B
+#define PCI_DEVICE_ID_ATI_RAGE128_PL	0x504C
+#define PCI_DEVICE_ID_ATI_RAGE128_PM	0x504D
+#define PCI_DEVICE_ID_ATI_RAGE128_PN	0x504E
+#define PCI_DEVICE_ID_ATI_RAGE128_PO	0x504F
+#define PCI_DEVICE_ID_ATI_RAGE128_PP	0x5050
+#define PCI_DEVICE_ID_ATI_RAGE128_PQ	0x5051
+#define PCI_DEVICE_ID_ATI_RAGE128_PR	0x5052
+#define PCI_DEVICE_ID_ATI_RAGE128_TR	0x5452
+#define PCI_DEVICE_ID_ATI_RAGE128_PS	0x5053
+#define PCI_DEVICE_ID_ATI_RAGE128_PT	0x5054
+#define PCI_DEVICE_ID_ATI_RAGE128_PU	0x5055
+#define PCI_DEVICE_ID_ATI_RAGE128_PV	0x5056
+#define PCI_DEVICE_ID_ATI_RAGE128_PW	0x5057
+#define PCI_DEVICE_ID_ATI_RAGE128_PX	0x5058
+/* Rage128 GL */
+#define PCI_DEVICE_ID_ATI_RAGE128_RE	0x5245
+#define PCI_DEVICE_ID_ATI_RAGE128_RF	0x5246
+#define PCI_DEVICE_ID_ATI_RAGE128_RG	0x534b
+#define PCI_DEVICE_ID_ATI_RAGE128_RH	0x534c
+#define PCI_DEVICE_ID_ATI_RAGE128_RI	0x534d
+/* Rage128 VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_RK	0x524b
+#define PCI_DEVICE_ID_ATI_RAGE128_RL	0x524c
+#define PCI_DEVICE_ID_ATI_RAGE128_RM	0x5345
+#define PCI_DEVICE_ID_ATI_RAGE128_RN	0x5346
+#define PCI_DEVICE_ID_ATI_RAGE128_RO	0x5347
+/* Rage128 M3 */
+#define PCI_DEVICE_ID_ATI_RAGE128_LE	0x4c45
+#define PCI_DEVICE_ID_ATI_RAGE128_LF	0x4c46
+/* Rage128 Pro Ultra */
+#define PCI_DEVICE_ID_ATI_RAGE128_U1	0x5446
+#define PCI_DEVICE_ID_ATI_RAGE128_U2	0x544C
+#define PCI_DEVICE_ID_ATI_RAGE128_U3	0x5452
+/* Radeon M4 */
+#define PCI_DEVICE_ID_ATI_RADEON_LE	0x4d45
+#define PCI_DEVICE_ID_ATI_RADEON_LF	0x4d46
+/* Radeon NV-100 */
+#define PCI_DEVICE_ID_ATI_RADEON_N1	0x5159
+#define PCI_DEVICE_ID_ATI_RADEON_N2	0x515a
+/* Radeon */
+#define PCI_DEVICE_ID_ATI_RADEON_RA	0x5144
+#define PCI_DEVICE_ID_ATI_RADEON_RB	0x5145
+#define PCI_DEVICE_ID_ATI_RADEON_RC	0x5146
+#define PCI_DEVICE_ID_ATI_RADEON_RD	0x5147
+/* RadeonIGP */
+#define PCI_DEVICE_ID_ATI_RADEON_IGP	0xCAB0
+
+#define PCI_VENDOR_ID_VLSI		0x1004
+#define PCI_DEVICE_ID_VLSI_82C592	0x0005
+#define PCI_DEVICE_ID_VLSI_82C593	0x0006
+#define PCI_DEVICE_ID_VLSI_82C594	0x0007
+#define PCI_DEVICE_ID_VLSI_82C597	0x0009
+#define PCI_DEVICE_ID_VLSI_82C541	0x000c
+#define PCI_DEVICE_ID_VLSI_82C543	0x000d
+#define PCI_DEVICE_ID_VLSI_82C532	0x0101
+#define PCI_DEVICE_ID_VLSI_82C534	0x0102
+#define PCI_DEVICE_ID_VLSI_82C535	0x0104
+#define PCI_DEVICE_ID_VLSI_82C147	0x0105
+#define PCI_DEVICE_ID_VLSI_VAS96011	0x0702
+
+#define PCI_VENDOR_ID_ADL		0x1005
+#define PCI_DEVICE_ID_ADL_2301		0x2301
+
+#define PCI_VENDOR_ID_NS		0x100b
+#define PCI_DEVICE_ID_NS_87415		0x0002
+#define PCI_DEVICE_ID_NS_87560_LIO	0x000e
+#define PCI_DEVICE_ID_NS_87560_USB	0x0012
+#define PCI_DEVICE_ID_NS_83815		0x0020
+#define PCI_DEVICE_ID_NS_83820		0x0022
+#define PCI_DEVICE_ID_NS_SCx200_BRIDGE	0x0500
+#define PCI_DEVICE_ID_NS_SCx200_SMI	0x0501
+#define PCI_DEVICE_ID_NS_SCx200_IDE	0x0502
+#define PCI_DEVICE_ID_NS_SCx200_AUDIO	0x0503
+#define PCI_DEVICE_ID_NS_SCx200_VIDEO	0x0504
+#define PCI_DEVICE_ID_NS_SCx200_XBUS	0x0505
+#define PCI_DEVICE_ID_NS_87410		0xd001
+
+#define PCI_VENDOR_ID_TSENG		0x100c
+#define PCI_DEVICE_ID_TSENG_W32P_2	0x3202
+#define PCI_DEVICE_ID_TSENG_W32P_b	0x3205
+#define PCI_DEVICE_ID_TSENG_W32P_c	0x3206
+#define PCI_DEVICE_ID_TSENG_W32P_d	0x3207
+#define PCI_DEVICE_ID_TSENG_ET6000	0x3208
+
+#define PCI_VENDOR_ID_WEITEK		0x100e
+#define PCI_DEVICE_ID_WEITEK_P9000	0x9001
+#define PCI_DEVICE_ID_WEITEK_P9100	0x9100
+
+#define PCI_VENDOR_ID_DEC		0x1011
+#define PCI_DEVICE_ID_DEC_BRD		0x0001
+#define PCI_DEVICE_ID_DEC_TULIP		0x0002
+#define PCI_DEVICE_ID_DEC_TGA		0x0004
+#define PCI_DEVICE_ID_DEC_TULIP_FAST	0x0009
+#define PCI_DEVICE_ID_DEC_TGA2		0x000D
+#define PCI_DEVICE_ID_DEC_FDDI		0x000F
+#define PCI_DEVICE_ID_DEC_TULIP_PLUS	0x0014
+#define PCI_DEVICE_ID_DEC_21142		0x0019
+#define PCI_DEVICE_ID_DEC_21052		0x0021
+#define PCI_DEVICE_ID_DEC_21150		0x0022
+#define PCI_DEVICE_ID_DEC_21152		0x0024
+#define PCI_DEVICE_ID_DEC_21153		0x0025
+#define PCI_DEVICE_ID_DEC_21154		0x0026
+#define PCI_DEVICE_ID_DEC_21285		0x1065
+#define PCI_DEVICE_ID_COMPAQ_42XX	0x0046
+
+#define PCI_VENDOR_ID_CIRRUS		0x1013
+#define PCI_DEVICE_ID_CIRRUS_7548	0x0038
+#define PCI_DEVICE_ID_CIRRUS_5430	0x00a0
+#define PCI_DEVICE_ID_CIRRUS_5434_4	0x00a4
+#define PCI_DEVICE_ID_CIRRUS_5434_8	0x00a8
+#define PCI_DEVICE_ID_CIRRUS_5436	0x00ac
+#define PCI_DEVICE_ID_CIRRUS_5446	0x00b8
+#define PCI_DEVICE_ID_CIRRUS_5480	0x00bc
+#define PCI_DEVICE_ID_CIRRUS_5462	0x00d0
+#define PCI_DEVICE_ID_CIRRUS_5464	0x00d4
+#define PCI_DEVICE_ID_CIRRUS_5465	0x00d6
+#define PCI_DEVICE_ID_CIRRUS_6729	0x1100
+#define PCI_DEVICE_ID_CIRRUS_6832	0x1110
+#define PCI_DEVICE_ID_CIRRUS_7542	0x1200
+#define PCI_DEVICE_ID_CIRRUS_7543	0x1202
+#define PCI_DEVICE_ID_CIRRUS_7541	0x1204
+
+#define PCI_VENDOR_ID_IBM		0x1014
+#define PCI_DEVICE_ID_IBM_FIRE_CORAL	0x000a
+#define PCI_DEVICE_ID_IBM_TR		0x0018
+#define PCI_DEVICE_ID_IBM_82G2675	0x001d
+#define PCI_DEVICE_ID_IBM_MCA		0x0020
+#define PCI_DEVICE_ID_IBM_82351		0x0022
+#define PCI_DEVICE_ID_IBM_PYTHON	0x002d
+#define PCI_DEVICE_ID_IBM_SERVERAID	0x002e
+#define PCI_DEVICE_ID_IBM_TR_WAKE	0x003e
+#define PCI_DEVICE_ID_IBM_MPIC		0x0046
+#define PCI_DEVICE_ID_IBM_3780IDSP	0x007d
+#define PCI_DEVICE_ID_IBM_CHUKAR	0x0096
+#define PCI_DEVICE_ID_IBM_CPC710_PCI64	0x00fc
+#define PCI_DEVICE_ID_IBM_CPC710_PCI32	0x0105
+#define	PCI_DEVICE_ID_IBM_405GP		0x0156
+#define PCI_DEVICE_ID_IBM_SERVERAIDI960	0x01bd
+#define PCI_DEVICE_ID_IBM_MPIC_2	0xffff
+
+#define PCI_VENDOR_ID_COMPEX2		0x101a // pci.ids says "AT&T GIS (NCR)"
+#define PCI_DEVICE_ID_COMPEX2_100VG	0x0005
+
+#define PCI_VENDOR_ID_WD		0x101c
+#define PCI_DEVICE_ID_WD_7197		0x3296
+#define PCI_DEVICE_ID_WD_90C		0xc24a
+
+#define PCI_VENDOR_ID_AMI		0x101e
+#define PCI_DEVICE_ID_AMI_MEGARAID3	0x1960
+#define PCI_DEVICE_ID_AMI_MEGARAID	0x9010
+#define PCI_DEVICE_ID_AMI_MEGARAID2	0x9060
+
+#define PCI_VENDOR_ID_AMD		0x1022
+#define PCI_DEVICE_ID_AMD_LANCE		0x2000
+#define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
+#define PCI_DEVICE_ID_AMD_SCSI		0x2020
+#define PCI_DEVICE_ID_AMD_FE_GATE_7006	0x7006
+#define PCI_DEVICE_ID_AMD_FE_GATE_7007	0x7007
+#define PCI_DEVICE_ID_AMD_FE_GATE_700C	0x700C
+#define PCI_DEVICE_ID_AMD_FE_GATE_700D	0x700D
+#define PCI_DEVICE_ID_AMD_FE_GATE_700E	0x700E
+#define PCI_DEVICE_ID_AMD_FE_GATE_700F	0x700F
+#define PCI_DEVICE_ID_AMD_COBRA_7400	0x7400
+#define PCI_DEVICE_ID_AMD_COBRA_7401	0x7401
+#define PCI_DEVICE_ID_AMD_COBRA_7403	0x7403
+#define PCI_DEVICE_ID_AMD_COBRA_7404	0x7404
+#define PCI_DEVICE_ID_AMD_VIPER_7408	0x7408
+#define PCI_DEVICE_ID_AMD_VIPER_7409	0x7409
+#define PCI_DEVICE_ID_AMD_VIPER_740B	0x740B
+#define PCI_DEVICE_ID_AMD_VIPER_740C	0x740C
+#define PCI_DEVICE_ID_AMD_VIPER_7410	0x7410
+#define PCI_DEVICE_ID_AMD_VIPER_7411	0x7411
+#define PCI_DEVICE_ID_AMD_VIPER_7413	0x7413
+#define PCI_DEVICE_ID_AMD_VIPER_7414	0x7414
+#define PCI_DEVICE_ID_AMD_OPUS_7440	0x7440
+#	define PCI_DEVICE_ID_AMD_VIPER_7440	PCI_DEVICE_ID_AMD_OPUS_7440
+#define PCI_DEVICE_ID_AMD_OPUS_7441	0x7441
+#	define PCI_DEVICE_ID_AMD_VIPER_7441	PCI_DEVICE_ID_AMD_OPUS_7441
+#define PCI_DEVICE_ID_AMD_OPUS_7443	0x7443
+#	define PCI_DEVICE_ID_AMD_VIPER_7443	PCI_DEVICE_ID_AMD_OPUS_7443
+#define PCI_DEVICE_ID_AMD_OPUS_7448	0x7448
+# define	PCI_DEVICE_ID_AMD_VIPER_7448	PCI_DEVICE_ID_AMD_OPUS_7448
+#define PCI_DEVICE_ID_AMD_OPUS_7449	0x7449
+#	define PCI_DEVICE_ID_AMD_VIPER_7449	PCI_DEVICE_ID_AMD_OPUS_7449
+#define PCI_DEVICE_ID_AMD_8111_LAN	0x7462
+#define PCI_DEVICE_ID_AMD_8111_IDE     0x7469
+#define PCI_DEVICE_ID_AMD_8111_AC97    0x746d
+
+#define PCI_VENDOR_ID_TRIDENT		0x1023
+#define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000
+#define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX	0x2001
+#define PCI_DEVICE_ID_TRIDENT_9320	0x9320
+#define PCI_DEVICE_ID_TRIDENT_9388	0x9388
+#define PCI_DEVICE_ID_TRIDENT_9397	0x9397
+#define PCI_DEVICE_ID_TRIDENT_939A	0x939A
+#define PCI_DEVICE_ID_TRIDENT_9520	0x9520
+#define PCI_DEVICE_ID_TRIDENT_9525	0x9525
+#define PCI_DEVICE_ID_TRIDENT_9420	0x9420
+#define PCI_DEVICE_ID_TRIDENT_9440	0x9440
+#define PCI_DEVICE_ID_TRIDENT_9660	0x9660
+#define PCI_DEVICE_ID_TRIDENT_9750	0x9750
+#define PCI_DEVICE_ID_TRIDENT_9850	0x9850
+#define PCI_DEVICE_ID_TRIDENT_9880	0x9880
+#define PCI_DEVICE_ID_TRIDENT_8400	0x8400
+#define PCI_DEVICE_ID_TRIDENT_8420	0x8420
+#define PCI_DEVICE_ID_TRIDENT_8500	0x8500
+
+#define PCI_VENDOR_ID_AI		0x1025
+#define PCI_DEVICE_ID_AI_M1435		0x1435
+
+#define PCI_VENDOR_ID_DELL              0x1028
+
+#define PCI_VENDOR_ID_MATROX		0x102B
+#define PCI_DEVICE_ID_MATROX_MGA_2	0x0518
+#define PCI_DEVICE_ID_MATROX_MIL	0x0519
+#define PCI_DEVICE_ID_MATROX_MYS	0x051A
+#define PCI_DEVICE_ID_MATROX_MIL_2	0x051b
+#define PCI_DEVICE_ID_MATROX_MIL_2_AGP	0x051f
+#define PCI_DEVICE_ID_MATROX_MGA_IMP	0x0d10
+#define PCI_DEVICE_ID_MATROX_G100_MM	0x1000
+#define PCI_DEVICE_ID_MATROX_G100_AGP	0x1001
+#define PCI_DEVICE_ID_MATROX_G200_PCI	0x0520
+#define PCI_DEVICE_ID_MATROX_G200_AGP	0x0521
+#define	PCI_DEVICE_ID_MATROX_G400	0x0525
+#define PCI_DEVICE_ID_MATROX_G550	0x2527
+#define PCI_DEVICE_ID_MATROX_VIA	0x4536
+
+#define PCI_VENDOR_ID_CT		0x102c
+#define PCI_DEVICE_ID_CT_65545		0x00d8
+#define PCI_DEVICE_ID_CT_65548		0x00dc
+#define PCI_DEVICE_ID_CT_65550		0x00e0
+#define PCI_DEVICE_ID_CT_65554		0x00e4
+#define PCI_DEVICE_ID_CT_65555		0x00e5
+
+#define PCI_VENDOR_ID_MIRO		0x1031
+#define PCI_DEVICE_ID_MIRO_36050	0x5601
+
+#define PCI_VENDOR_ID_NEC		0x1033
+#define PCI_DEVICE_ID_NEC_PCX2		0x0046
+#define PCI_DEVICE_ID_NEC_NILE4		0x005a
+#define PCI_DEVICE_ID_NEC_VRC5476       0x009b
+#define PCI_DEVICE_ID_NEC_VRC5477_AC97  0x00a6
+
+#define PCI_VENDOR_ID_FD		0x1036
+#define PCI_DEVICE_ID_FD_36C70		0x0000
+
+#define PCI_VENDOR_ID_SI		0x1039
+#define PCI_DEVICE_ID_SI_5591_AGP	0x0001
+#define PCI_DEVICE_ID_SI_6202		0x0002
+#define PCI_DEVICE_ID_SI_503		0x0008
+#define PCI_DEVICE_ID_SI_ACPI		0x0009
+#define PCI_DEVICE_ID_SI_5597_VGA	0x0200
+#define PCI_DEVICE_ID_SI_6205		0x0205
+#define PCI_DEVICE_ID_SI_501		0x0406
+#define PCI_DEVICE_ID_SI_496		0x0496
+#define PCI_DEVICE_ID_SI_300		0x0300
+#define PCI_DEVICE_ID_SI_315H		0x0310
+#define PCI_DEVICE_ID_SI_315		0x0315
+#define PCI_DEVICE_ID_SI_315PRO		0x0325
+#define PCI_DEVICE_ID_SI_530		0x0530
+#define PCI_DEVICE_ID_SI_540		0x0540
+#define PCI_DEVICE_ID_SI_550		0x0550
+#define PCI_DEVICE_ID_SI_540_VGA	0x5300
+#define PCI_DEVICE_ID_SI_550_VGA	0x5315
+#define PCI_DEVICE_ID_SI_601		0x0601
+#define PCI_DEVICE_ID_SI_620		0x0620
+#define PCI_DEVICE_ID_SI_630		0x0630
+#define PCI_DEVICE_ID_SI_635		0x0635
+#define PCI_DEVICE_ID_SI_640		0x0640
+#define PCI_DEVICE_ID_SI_645		0x0645
+#define PCI_DEVICE_ID_SI_646		0x0646
+#define PCI_DEVICE_ID_SI_648		0x0648
+#define PCI_DEVICE_ID_SI_650		0x0650
+#define PCI_DEVICE_ID_SI_651		0x0651
+#define PCI_DEVICE_ID_SI_652		0x0652
+#define PCI_DEVICE_ID_SI_730		0x0730
+#define PCI_DEVICE_ID_SI_630_VGA	0x6300
+#define PCI_DEVICE_ID_SI_730_VGA	0x7300
+#define PCI_DEVICE_ID_SI_735		0x0735
+#define PCI_DEVICE_ID_SI_740		0x0740
+#define PCI_DEVICE_ID_SI_745		0x0745
+#define PCI_DEVICE_ID_SI_746		0x0746
+#define PCI_DEVICE_ID_SI_748		0x0748
+#define PCI_DEVICE_ID_SI_750		0x0750
+#define PCI_DEVICE_ID_SI_751		0x0751
+#define PCI_DEVICE_ID_SI_752		0x0752
+#define PCI_DEVICE_ID_SI_900		0x0900
+#define PCI_DEVICE_ID_SI_5107		0x5107
+#define PCI_DEVICE_ID_SI_5300		0x5300
+#define PCI_DEVICE_ID_SI_5511		0x5511
+#define PCI_DEVICE_ID_SI_5513		0x5513
+#define PCI_DEVICE_ID_SI_5518		0x5518
+#define PCI_DEVICE_ID_SI_5571		0x5571
+#define PCI_DEVICE_ID_SI_5591		0x5591
+#define PCI_DEVICE_ID_SI_5597		0x5597
+#define PCI_DEVICE_ID_SI_5598		0x5598
+#define PCI_DEVICE_ID_SI_5600		0x5600
+#define PCI_DEVICE_ID_SI_6300		0x6300
+#define PCI_DEVICE_ID_SI_6306		0x6306
+#define PCI_DEVICE_ID_SI_6326		0x6326
+#define PCI_DEVICE_ID_SI_7001		0x7001
+#define PCI_DEVICE_ID_SI_7016		0x7016
+
+#define PCI_VENDOR_ID_HP		0x103c
+#define PCI_DEVICE_ID_HP_DONNER_GFX	0x1008
+#define PCI_DEVICE_ID_HP_TACHYON	0x1028
+#define PCI_DEVICE_ID_HP_TACHLITE	0x1029
+#define PCI_DEVICE_ID_HP_J2585A		0x1030
+#define PCI_DEVICE_ID_HP_J2585B		0x1031
+#define PCI_DEVICE_ID_HP_SAS		0x1048
+#define PCI_DEVICE_ID_HP_DIVA1		0x1049
+#define PCI_DEVICE_ID_HP_DIVA2		0x104A
+#define PCI_DEVICE_ID_HP_SP2_0		0x104B
+#define PCI_DEVICE_ID_HP_REO_SBA	0x10f0
+#define PCI_DEVICE_ID_HP_REO_IOC	0x10f1
+#define PCI_DEVICE_ID_HP_ZX1_SBA	0x1229
+#define PCI_DEVICE_ID_HP_ZX1_IOC	0x122a
+#define PCI_DEVICE_ID_HP_ZX1_LBA	0x122e
+
+#define PCI_VENDOR_ID_PCTECH		0x1042
+#define PCI_DEVICE_ID_PCTECH_RZ1000	0x1000
+#define PCI_DEVICE_ID_PCTECH_RZ1001	0x1001
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_0	0x3000
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_1	0x3010
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_IDE 0x3020
+
+#define PCI_VENDOR_ID_ASUSTEK		0x1043
+#define PCI_DEVICE_ID_ASUSTEK_0675	0x0675
+
+#define PCI_VENDOR_ID_DPT		0x1044
+#define PCI_DEVICE_ID_DPT		0xa400
+
+#define PCI_VENDOR_ID_OPTI		0x1045
+#define PCI_DEVICE_ID_OPTI_92C178	0xc178
+#define PCI_DEVICE_ID_OPTI_82C557	0xc557
+#define PCI_DEVICE_ID_OPTI_82C558	0xc558
+#define PCI_DEVICE_ID_OPTI_82C621	0xc621
+#define PCI_DEVICE_ID_OPTI_82C700	0xc700
+#define PCI_DEVICE_ID_OPTI_82C701	0xc701
+#define PCI_DEVICE_ID_OPTI_82C814	0xc814
+#define PCI_DEVICE_ID_OPTI_82C822	0xc822
+#define PCI_DEVICE_ID_OPTI_82C861	0xc861
+#define PCI_DEVICE_ID_OPTI_82C825	0xd568
+
+#define PCI_VENDOR_ID_ELSA		0x1048
+#define PCI_DEVICE_ID_ELSA_MICROLINK	0x1000
+#define PCI_DEVICE_ID_ELSA_QS3000	0x3000
+
+#define PCI_VENDOR_ID_SGS		0x104a
+#define PCI_DEVICE_ID_SGS_2000		0x0008
+#define PCI_DEVICE_ID_SGS_1764		0x0009
+
+#define PCI_VENDOR_ID_BUSLOGIC		      0x104B
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER    0x1040
+#define PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT     0x8130
+
+#define PCI_VENDOR_ID_TI		0x104c
+#define PCI_DEVICE_ID_TI_TVP4010	0x3d04
+#define PCI_DEVICE_ID_TI_TVP4020	0x3d07
+#define PCI_DEVICE_ID_TI_1130		0xac12
+#define PCI_DEVICE_ID_TI_1031		0xac13
+#define PCI_DEVICE_ID_TI_1131		0xac15
+#define PCI_DEVICE_ID_TI_1250		0xac16
+#define PCI_DEVICE_ID_TI_1220		0xac17
+#define PCI_DEVICE_ID_TI_1221		0xac19
+#define PCI_DEVICE_ID_TI_1210		0xac1a
+#define PCI_DEVICE_ID_TI_1410		0xac50
+#define PCI_DEVICE_ID_TI_1450		0xac1b
+#define PCI_DEVICE_ID_TI_1225		0xac1c
+#define PCI_DEVICE_ID_TI_1251A		0xac1d
+#define PCI_DEVICE_ID_TI_1211		0xac1e
+#define PCI_DEVICE_ID_TI_1251B		0xac1f
+#define PCI_DEVICE_ID_TI_4410		0xac41
+#define PCI_DEVICE_ID_TI_4451		0xac42
+#define PCI_DEVICE_ID_TI_1420		0xac51
+
+#define PCI_VENDOR_ID_SONY		0x104d
+#define PCI_DEVICE_ID_SONY_CXD3222	0x8039
+
+#define PCI_VENDOR_ID_OAK		0x104e
+#define PCI_DEVICE_ID_OAK_OTI107	0x0107
+
+/* Winbond have two vendor IDs! See 0x10ad as well */
+#define PCI_VENDOR_ID_WINBOND2		0x1050
+#define PCI_DEVICE_ID_WINBOND2_89C940	0x0940
+#define PCI_DEVICE_ID_WINBOND2_89C940F	0x5a5a
+#define PCI_DEVICE_ID_WINBOND2_6692	0x6692
+
+#define PCI_VENDOR_ID_ANIGMA		0x1051
+#define PCI_DEVICE_ID_ANIGMA_MC145575	0x0100
+  
+#define PCI_VENDOR_ID_EFAR		0x1055
+#define PCI_DEVICE_ID_EFAR_SLC90E66_1	0x9130
+#define PCI_DEVICE_ID_EFAR_SLC90E66_0	0x9460
+#define PCI_DEVICE_ID_EFAR_SLC90E66_2	0x9462
+#define PCI_DEVICE_ID_EFAR_SLC90E66_3	0x9463
+
+#define PCI_VENDOR_ID_MOTOROLA		0x1057
+#define PCI_VENDOR_ID_MOTOROLA_OOPS	0x1507
+#define PCI_DEVICE_ID_MOTOROLA_MPC105	0x0001
+#define PCI_DEVICE_ID_MOTOROLA_MPC106	0x0002
+#define PCI_DEVICE_ID_MOTOROLA_MPC107	0x0004
+#define PCI_DEVICE_ID_MOTOROLA_RAVEN	0x4801
+#define PCI_DEVICE_ID_MOTOROLA_FALCON	0x4802
+#define PCI_DEVICE_ID_MOTOROLA_HAWK	0x4803
+#define PCI_DEVICE_ID_MOTOROLA_CPX8216	0x4806
+
+#define PCI_VENDOR_ID_PROMISE		0x105a
+#define PCI_DEVICE_ID_PROMISE_20265	0x0d30
+#define PCI_DEVICE_ID_PROMISE_20267	0x4d30
+#define PCI_DEVICE_ID_PROMISE_20246	0x4d33
+#define PCI_DEVICE_ID_PROMISE_20262	0x4d38
+#define PCI_DEVICE_ID_PROMISE_20263	0x0D38
+#define PCI_DEVICE_ID_PROMISE_20268	0x4d68
+#define PCI_DEVICE_ID_PROMISE_20270	0x6268
+#define PCI_DEVICE_ID_PROMISE_20269	0x4d69
+#define PCI_DEVICE_ID_PROMISE_20271	0x6269
+#define PCI_DEVICE_ID_PROMISE_20275	0x1275
+#define PCI_DEVICE_ID_PROMISE_20276	0x5275
+#define PCI_DEVICE_ID_PROMISE_20277	0x7275
+#define PCI_DEVICE_ID_PROMISE_5300	0x5300
+
+#define PCI_VENDOR_ID_N9		0x105d
+#define PCI_DEVICE_ID_N9_I128		0x2309
+#define PCI_DEVICE_ID_N9_I128_2		0x2339
+#define PCI_DEVICE_ID_N9_I128_T2R	0x493d
+
+#define PCI_VENDOR_ID_UMC		0x1060
+#define PCI_DEVICE_ID_UMC_UM8673F	0x0101
+#define PCI_DEVICE_ID_UMC_UM8891A	0x0891
+#define PCI_DEVICE_ID_UMC_UM8886BF	0x673a
+#define PCI_DEVICE_ID_UMC_UM8886A	0x886a
+#define PCI_DEVICE_ID_UMC_UM8881F	0x8881
+#define PCI_DEVICE_ID_UMC_UM8886F	0x8886
+#define PCI_DEVICE_ID_UMC_UM9017F	0x9017
+#define PCI_DEVICE_ID_UMC_UM8886N	0xe886
+#define PCI_DEVICE_ID_UMC_UM8891N	0xe891
+
+#define PCI_VENDOR_ID_X			0x1061
+#define PCI_DEVICE_ID_X_AGX016		0x0001
+
+#define PCI_VENDOR_ID_MYLEX		0x1069
+#define PCI_DEVICE_ID_MYLEX_DAC960_P	0x0001
+#define PCI_DEVICE_ID_MYLEX_DAC960_PD	0x0002
+#define PCI_DEVICE_ID_MYLEX_DAC960_PG	0x0010
+#define PCI_DEVICE_ID_MYLEX_DAC960_LA	0x0020
+#define PCI_DEVICE_ID_MYLEX_DAC960_LP	0x0050
+#define PCI_DEVICE_ID_MYLEX_DAC960_BA	0xBA56
+
+#define PCI_VENDOR_ID_PICOP		0x1066
+#define PCI_DEVICE_ID_PICOP_PT86C52X	0x0001
+#define PCI_DEVICE_ID_PICOP_PT80C524	0x8002
+
+#define PCI_VENDOR_ID_APPLE		0x106b
+#define PCI_DEVICE_ID_APPLE_BANDIT	0x0001
+#define PCI_DEVICE_ID_APPLE_GC		0x0002
+#define PCI_DEVICE_ID_APPLE_HYDRA	0x000e
+#define PCI_DEVICE_ID_APPLE_UNI_N_FW	0x0018
+#define PCI_DEVICE_ID_APPLE_KL_USB	0x0019
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP	0x0020
+#define PCI_DEVICE_ID_APPLE_UNI_N_GMAC	0x0021
+#define PCI_DEVICE_ID_APPLE_KEYLARGO	0x0022
+#define PCI_DEVICE_ID_APPLE_UNI_N_GMACP	0x0024
+#define PCI_DEVICE_ID_APPLE_KEYLARGO_P	0x0025
+#define PCI_DEVICE_ID_APPLE_KL_USB_P	0x0026
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P	0x0027
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP15	0x002d
+#define PCI_DEVICE_ID_APPLE_UNI_N_FW2	0x0030
+
+#define PCI_VENDOR_ID_YAMAHA		0x1073
+#define PCI_DEVICE_ID_YAMAHA_724	0x0004
+#define PCI_DEVICE_ID_YAMAHA_724F	0x000d
+#define PCI_DEVICE_ID_YAMAHA_740	0x000a
+#define PCI_DEVICE_ID_YAMAHA_740C	0x000c
+#define PCI_DEVICE_ID_YAMAHA_744	0x0010
+#define PCI_DEVICE_ID_YAMAHA_754	0x0012
+
+#define PCI_VENDOR_ID_NEXGEN		0x1074
+#define PCI_DEVICE_ID_NEXGEN_82C501	0x4e78
+
+#define PCI_VENDOR_ID_QLOGIC		0x1077
+#define PCI_DEVICE_ID_QLOGIC_ISP1020	0x1020
+#define PCI_DEVICE_ID_QLOGIC_ISP1022	0x1022
+#define PCI_DEVICE_ID_QLOGIC_ISP2100	0x2100
+#define PCI_DEVICE_ID_QLOGIC_ISP2200	0x2200
+
+#define PCI_VENDOR_ID_CYRIX		0x1078
+#define PCI_DEVICE_ID_CYRIX_5510	0x0000
+#define PCI_DEVICE_ID_CYRIX_PCI_MASTER	0x0001
+#define PCI_DEVICE_ID_CYRIX_5520	0x0002
+#define PCI_DEVICE_ID_CYRIX_5530_LEGACY	0x0100
+#define PCI_DEVICE_ID_CYRIX_5530_SMI	0x0101
+#define PCI_DEVICE_ID_CYRIX_5530_IDE	0x0102
+#define PCI_DEVICE_ID_CYRIX_5530_AUDIO	0x0103
+#define PCI_DEVICE_ID_CYRIX_5530_VIDEO	0x0104
+
+#define PCI_VENDOR_ID_LEADTEK		0x107d
+#define PCI_DEVICE_ID_LEADTEK_805	0x0000
+
+#define PCI_VENDOR_ID_INTERPHASE	0x107e
+#define PCI_DEVICE_ID_INTERPHASE_5526	0x0004
+#define PCI_DEVICE_ID_INTERPHASE_55x6	0x0005
+#define PCI_DEVICE_ID_INTERPHASE_5575	0x0008
+
+#define PCI_VENDOR_ID_CONTAQ		0x1080
+#define PCI_DEVICE_ID_CONTAQ_82C599	0x0600
+#define PCI_DEVICE_ID_CONTAQ_82C693	0xc693
+
+#define PCI_VENDOR_ID_FOREX		0x1083
+
+#define PCI_VENDOR_ID_OLICOM		0x108d
+#define PCI_DEVICE_ID_OLICOM_OC3136	0x0001
+#define PCI_DEVICE_ID_OLICOM_OC2315	0x0011
+#define PCI_DEVICE_ID_OLICOM_OC2325	0x0012
+#define PCI_DEVICE_ID_OLICOM_OC2183	0x0013
+#define PCI_DEVICE_ID_OLICOM_OC2326	0x0014
+#define PCI_DEVICE_ID_OLICOM_OC6151	0x0021
+
+#define PCI_VENDOR_ID_SUN		0x108e
+#define PCI_DEVICE_ID_SUN_EBUS		0x1000
+#define PCI_DEVICE_ID_SUN_HAPPYMEAL	0x1001
+#define PCI_DEVICE_ID_SUN_RIO_EBUS	0x1100
+#define PCI_DEVICE_ID_SUN_RIO_GEM	0x1101
+#define PCI_DEVICE_ID_SUN_RIO_1394	0x1102
+#define PCI_DEVICE_ID_SUN_RIO_USB	0x1103
+#define PCI_DEVICE_ID_SUN_GEM		0x2bad
+#define PCI_DEVICE_ID_SUN_SIMBA		0x5000
+#define PCI_DEVICE_ID_SUN_PBM		0x8000
+#define PCI_DEVICE_ID_SUN_SCHIZO	0x8001
+#define PCI_DEVICE_ID_SUN_SABRE		0xa000
+#define PCI_DEVICE_ID_SUN_HUMMINGBIRD	0xa001
+
+#define PCI_VENDOR_ID_CMD		0x1095
+#define PCI_DEVICE_ID_CMD_640		0x0640
+#define PCI_DEVICE_ID_CMD_643		0x0643
+#define PCI_DEVICE_ID_CMD_646		0x0646
+#define PCI_DEVICE_ID_CMD_647		0x0647
+#define PCI_DEVICE_ID_CMD_648		0x0648
+#define PCI_DEVICE_ID_CMD_649		0x0649
+#define PCI_DEVICE_ID_CMD_670		0x0670
+#define PCI_DEVICE_ID_CMD_680		0x0680
+
+#define PCI_DEVICE_ID_SII_680		0x0680
+#define PCI_DEVICE_ID_SII_3112		0x3112
+
+#define PCI_VENDOR_ID_VISION		0x1098
+#define PCI_DEVICE_ID_VISION_QD8500	0x0001
+#define PCI_DEVICE_ID_VISION_QD8580	0x0002
+
+#define PCI_VENDOR_ID_BROOKTREE		0x109e
+#define PCI_DEVICE_ID_BROOKTREE_848	0x0350
+#define PCI_DEVICE_ID_BROOKTREE_849A	0x0351
+#define PCI_DEVICE_ID_BROOKTREE_878_1	0x036e
+#define PCI_DEVICE_ID_BROOKTREE_878	0x0878
+#define PCI_DEVICE_ID_BROOKTREE_8474	0x8474
+
+#define PCI_VENDOR_ID_SIERRA		0x10a8
+#define PCI_DEVICE_ID_SIERRA_STB	0x0000
+
+#define PCI_VENDOR_ID_SGI		0x10a9
+#define PCI_DEVICE_ID_SGI_IOC3		0x0003
+
+#define PCI_VENDOR_ID_ACC		0x10aa
+#define PCI_DEVICE_ID_ACC_2056		0x0000
+
+#define PCI_VENDOR_ID_WINBOND		0x10ad
+#define PCI_DEVICE_ID_WINBOND_83769	0x0001
+#define PCI_DEVICE_ID_WINBOND_82C105	0x0105
+#define PCI_DEVICE_ID_WINBOND_83C553	0x0565
+
+#define PCI_VENDOR_ID_DATABOOK		0x10b3
+#define PCI_DEVICE_ID_DATABOOK_87144	0xb106
+
+#define PCI_VENDOR_ID_PLX		0x10b5
+#define PCI_DEVICE_ID_PLX_R685		0x1030
+#define PCI_DEVICE_ID_PLX_ROMULUS	0x106a
+#define PCI_DEVICE_ID_PLX_SPCOM800	0x1076
+#define PCI_DEVICE_ID_PLX_1077		0x1077
+#define PCI_DEVICE_ID_PLX_SPCOM200	0x1103
+#define PCI_DEVICE_ID_PLX_DJINN_ITOO	0x1151
+#define PCI_DEVICE_ID_PLX_R753		0x1152
+#define PCI_DEVICE_ID_PLX_9050		0x9050
+#define PCI_DEVICE_ID_PLX_9060		0x9060
+#define PCI_DEVICE_ID_PLX_9060ES	0x906E
+#define PCI_DEVICE_ID_PLX_9060SD	0x906D
+#define PCI_DEVICE_ID_PLX_9080		0x9080
+#define PCI_DEVICE_ID_PLX_GTEK_SERIAL2	0xa001
+
+#define PCI_VENDOR_ID_MADGE		0x10b6
+#define PCI_DEVICE_ID_MADGE_MK2		0x0002
+#define PCI_DEVICE_ID_MADGE_C155S	0x1001
+
+#define PCI_VENDOR_ID_3COM		0x10b7
+#define PCI_DEVICE_ID_3COM_3C985	0x0001
+#define PCI_DEVICE_ID_3COM_3C339	0x3390
+#define PCI_DEVICE_ID_3COM_3C590	0x5900
+#define PCI_DEVICE_ID_3COM_3C595TX	0x5950
+#define PCI_DEVICE_ID_3COM_3C595T4	0x5951
+#define PCI_DEVICE_ID_3COM_3C595MII	0x5952
+#define PCI_DEVICE_ID_3COM_3C900TPO	0x9000
+#define PCI_DEVICE_ID_3COM_3C900COMBO	0x9001
+#define PCI_DEVICE_ID_3COM_3C905TX	0x9050
+#define PCI_DEVICE_ID_3COM_3C905T4	0x9051
+#define PCI_DEVICE_ID_3COM_3C905B_TX	0x9055
+
+#define PCI_VENDOR_ID_SMC		0x10b8
+#define PCI_DEVICE_ID_SMC_EPIC100	0x0005
+
+#define PCI_VENDOR_ID_AL		0x10b9
+#define PCI_DEVICE_ID_AL_M1445		0x1445
+#define PCI_DEVICE_ID_AL_M1449		0x1449
+#define PCI_DEVICE_ID_AL_M1451		0x1451
+#define PCI_DEVICE_ID_AL_M1461		0x1461
+#define PCI_DEVICE_ID_AL_M1489		0x1489
+#define PCI_DEVICE_ID_AL_M1511		0x1511
+#define PCI_DEVICE_ID_AL_M1513		0x1513
+#define PCI_DEVICE_ID_AL_M1521		0x1521
+#define PCI_DEVICE_ID_AL_M1523		0x1523
+#define PCI_DEVICE_ID_AL_M1531		0x1531
+#define PCI_DEVICE_ID_AL_M1533		0x1533
+#define PCI_DEVICE_ID_AL_M1535 		0x1535
+#define PCI_DEVICE_ID_AL_M1541		0x1541
+#define PCI_DEVICE_ID_AL_M1621          0x1621
+#define PCI_DEVICE_ID_AL_M1631          0x1631
+#define PCI_DEVICE_ID_AL_M1641          0x1641
+#define PCI_DEVICE_ID_AL_M1644          0x1644
+#define PCI_DEVICE_ID_AL_M1647          0x1647
+#define PCI_DEVICE_ID_AL_M1651          0x1651
+#define PCI_DEVICE_ID_AL_M1543		0x1543
+#define PCI_DEVICE_ID_AL_M3307		0x3307
+#define PCI_DEVICE_ID_AL_M4803		0x5215
+#define PCI_DEVICE_ID_AL_M5219		0x5219
+#define PCI_DEVICE_ID_AL_M5229		0x5229
+#define PCI_DEVICE_ID_AL_M5237		0x5237
+#define PCI_DEVICE_ID_AL_M5243		0x5243
+#define PCI_DEVICE_ID_AL_M5451		0x5451
+#define PCI_DEVICE_ID_AL_M7101		0x7101
+
+#define PCI_VENDOR_ID_MITSUBISHI	0x10ba
+
+#define PCI_VENDOR_ID_SURECOM		0x10bd
+#define PCI_DEVICE_ID_SURECOM_NE34	0x0e34
+
+#define PCI_VENDOR_ID_NEOMAGIC		0x10c8
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2070 0x0001
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128V 0x0002
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZV 0x0003
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2160 0x0004
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICMEDIA_256AV       0x0005
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZVPLUS   0x0083
+
+#define PCI_VENDOR_ID_ASP		0x10cd
+#define PCI_DEVICE_ID_ASP_ABP940	0x1200
+#define PCI_DEVICE_ID_ASP_ABP940U	0x1300
+#define PCI_DEVICE_ID_ASP_ABP940UW	0x2300
+
+#define PCI_VENDOR_ID_MACRONIX		0x10d9
+#define PCI_DEVICE_ID_MACRONIX_MX98713	0x0512
+#define PCI_DEVICE_ID_MACRONIX_MX987x5	0x0531
+
+#define PCI_VENDOR_ID_TCONRAD		0x10da
+#define PCI_DEVICE_ID_TCONRAD_TOKENRING	0x0508
+
+#define PCI_VENDOR_ID_CERN		0x10dc
+#define PCI_DEVICE_ID_CERN_SPSB_PMC	0x0001
+#define PCI_DEVICE_ID_CERN_SPSB_PCI	0x0002
+#define PCI_DEVICE_ID_CERN_HIPPI_DST	0x0021
+#define PCI_DEVICE_ID_CERN_HIPPI_SRC	0x0022
+
+#define PCI_VENDOR_ID_NVIDIA			0x10de
+#define PCI_DEVICE_ID_NVIDIA_TNT		0x0020
+#define PCI_DEVICE_ID_NVIDIA_TNT2		0x0028
+#define PCI_DEVICE_ID_NVIDIA_UTNT2		0x0029
+#define PCI_DEVICE_ID_NVIDIA_VTNT2		0x002C
+#define PCI_DEVICE_ID_NVIDIA_UVTNT2		0x002D
+#define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE	0x0065
+#define PCI_DEVICE_ID_NVIDIA_ITNT2		0x00A0
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR	0x0100
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR	0x0101
+#define PCI_DEVICE_ID_NVIDIA_QUADRO		0x0103
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX	0x0110
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX2	0x0111
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GO	0x0112
+#define PCI_DEVICE_ID_NVIDIA_QUADRO2_MXR	0x0113
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS	0x0150
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS2	0x0151
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_ULTRA	0x0152
+#define PCI_DEVICE_ID_NVIDIA_QUADRO2_PRO	0x0153
+#define PCI_DEVICE_ID_NVIDIA_IGEFORCE2		0x01a0
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE		0x01bc
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3		0x0200
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1		0x0201
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_2		0x0202
+#define PCI_DEVICE_ID_NVIDIA_QUADRO_DDC		0x0203
+
+#define PCI_VENDOR_ID_IMS		0x10e0
+#define PCI_DEVICE_ID_IMS_8849		0x8849
+#define PCI_DEVICE_ID_IMS_TT128		0x9128
+#define PCI_DEVICE_ID_IMS_TT3D		0x9135
+
+#define PCI_VENDOR_ID_TEKRAM2		0x10e1
+#define PCI_DEVICE_ID_TEKRAM2_690c	0x690c
+
+#define PCI_VENDOR_ID_TUNDRA		0x10e3
+#define PCI_DEVICE_ID_TUNDRA_CA91C042	0x0000
+
+#define PCI_VENDOR_ID_AMCC		0x10e8
+#define PCI_DEVICE_ID_AMCC_MYRINET	0x8043
+#define PCI_DEVICE_ID_AMCC_PARASTATION	0x8062
+#define PCI_DEVICE_ID_AMCC_S5933	0x807d
+#define PCI_DEVICE_ID_AMCC_S5933_HEPC3	0x809c
+
+#define PCI_VENDOR_ID_INTERG		0x10ea
+#define PCI_DEVICE_ID_INTERG_1680	0x1680
+#define PCI_DEVICE_ID_INTERG_1682	0x1682
+#define PCI_DEVICE_ID_INTERG_2000	0x2000
+#define PCI_DEVICE_ID_INTERG_2010	0x2010
+#define PCI_DEVICE_ID_INTERG_5000	0x5000
+#define PCI_DEVICE_ID_INTERG_5050	0x5050
+
+#define PCI_VENDOR_ID_REALTEK		0x10ec
+#define PCI_DEVICE_ID_REALTEK_8029	0x8029
+#define PCI_DEVICE_ID_REALTEK_8129	0x8129
+#define PCI_DEVICE_ID_REALTEK_8139	0x8139
+#define PCI_DEVICE_ID_REALTEK_8169	0x8169
+
+#define PCI_VENDOR_ID_XILINX		0x10ee
+#define PCI_DEVICE_ID_TURBOPAM		0x4020
+
+#define PCI_VENDOR_ID_TRUEVISION	0x10fa
+#define PCI_DEVICE_ID_TRUEVISION_T1000	0x000c
+
+#define PCI_VENDOR_ID_INIT		0x1101
+#define PCI_DEVICE_ID_INIT_320P		0x9100
+#define PCI_DEVICE_ID_INIT_360P		0x9500
+
+#define PCI_VENDOR_ID_CREATIVE		0x1102 // duplicate: ECTIVA
+#define PCI_DEVICE_ID_CREATIVE_EMU10K1	0x0002
+
+#define PCI_VENDOR_ID_ECTIVA		0x1102 // duplicate: CREATIVE
+#define PCI_DEVICE_ID_ECTIVA_EV1938	0x8938
+
+#define PCI_VENDOR_ID_TTI		0x1103
+#define PCI_DEVICE_ID_TTI_HPT343	0x0003
+#define PCI_DEVICE_ID_TTI_HPT366	0x0004
+#define PCI_DEVICE_ID_TTI_HPT372	0x0005
+#define PCI_DEVICE_ID_TTI_HPT302	0x0006
+#define PCI_DEVICE_ID_TTI_HPT371	0x0007
+#define PCI_DEVICE_ID_TTI_HPT374	0x0008
+
+#define PCI_VENDOR_ID_VIA		0x1106
+#define PCI_DEVICE_ID_VIA_8363_0	0x0305
+#define PCI_DEVICE_ID_VIA_8371_0	0x0391
+#define PCI_DEVICE_ID_VIA_8501_0	0x0501
+#define PCI_DEVICE_ID_VIA_82C505	0x0505
+#define PCI_DEVICE_ID_VIA_82C561	0x0561
+#define PCI_DEVICE_ID_VIA_82C586_1	0x0571
+#define PCI_DEVICE_ID_VIA_82C576	0x0576
+#define PCI_DEVICE_ID_VIA_82C585	0x0585
+#define PCI_DEVICE_ID_VIA_82C586_0	0x0586
+#define PCI_DEVICE_ID_VIA_82C595	0x0595
+#define PCI_DEVICE_ID_VIA_82C596	0x0596
+#define PCI_DEVICE_ID_VIA_82C597_0	0x0597
+#define PCI_DEVICE_ID_VIA_82C598_0	0x0598
+#define PCI_DEVICE_ID_VIA_8601_0	0x0601
+#define PCI_DEVICE_ID_VIA_8605_0	0x0605
+#define PCI_DEVICE_ID_VIA_82C680	0x0680
+#define PCI_DEVICE_ID_VIA_82C686	0x0686
+#define PCI_DEVICE_ID_VIA_82C691	0x0691
+#define PCI_DEVICE_ID_VIA_82C693	0x0693
+#define PCI_DEVICE_ID_VIA_82C693_1	0x0698
+#define PCI_DEVICE_ID_VIA_82C926	0x0926
+#define PCI_DEVICE_ID_VIA_82C576_1	0x1571
+#define PCI_DEVICE_ID_VIA_82C595_97	0x1595
+#define PCI_DEVICE_ID_VIA_82C586_2	0x3038
+#define PCI_DEVICE_ID_VIA_82C586_3	0x3040
+#define PCI_DEVICE_ID_VIA_6305		0x3044
+#define PCI_DEVICE_ID_VIA_82C596_3	0x3050
+#define PCI_DEVICE_ID_VIA_82C596B_3	0x3051
+#define PCI_DEVICE_ID_VIA_82C686_4	0x3057
+#define PCI_DEVICE_ID_VIA_82C686_5	0x3058
+#define PCI_DEVICE_ID_VIA_8233_5	0x3059
+#define PCI_DEVICE_ID_VIA_8233_7	0x3065
+#define PCI_DEVICE_ID_VIA_82C686_6	0x3068
+#define PCI_DEVICE_ID_VIA_8233_0	0x3074
+#define PCI_DEVICE_ID_VIA_8633_0	0x3091
+#define PCI_DEVICE_ID_VIA_8367_0	0x3099
+#define PCI_DEVICE_ID_VIA_8622		0x3102
+#define PCI_DEVICE_ID_VIA_8233C_0	0x3109
+#define PCI_DEVICE_ID_VIA_8361		0x3112
+#define PCI_DEVICE_ID_VIA_8233A		0x3147
+#define PCI_DEVICE_ID_VIA_P4X333   0x3168
+#define PCI_DEVICE_ID_VIA_8235        0x3177
+#define PCI_DEVICE_ID_VIA_8377_0  0x3189
+#define PCI_DEVICE_ID_VIA_8377_0	0x3189
+#define PCI_DEVICE_ID_VIA_86C100A	0x6100
+#define PCI_DEVICE_ID_VIA_8231		0x8231
+#define PCI_DEVICE_ID_VIA_8231_4	0x8235
+#define PCI_DEVICE_ID_VIA_8365_1	0x8305
+#define PCI_DEVICE_ID_VIA_8371_1	0x8391
+#define PCI_DEVICE_ID_VIA_8501_1	0x8501
+#define PCI_DEVICE_ID_VIA_82C597_1	0x8597
+#define PCI_DEVICE_ID_VIA_82C598_1	0x8598
+#define PCI_DEVICE_ID_VIA_8601_1	0x8601
+#define PCI_DEVICE_ID_VIA_8505_1	0x8605
+#define PCI_DEVICE_ID_VIA_8633_1	0xB091
+#define PCI_DEVICE_ID_VIA_8367_1	0xB099
+
+#define PCI_VENDOR_ID_SIEMENS           0x110A
+#define PCI_DEVICE_ID_SIEMENS_DSCC4     0x2102
+
+#define PCI_VENDOR_ID_SMC2		0x1113
+#define PCI_DEVICE_ID_SMC2_1211TX	0x1211
+
+#define PCI_VENDOR_ID_VORTEX		0x1119
+#define PCI_DEVICE_ID_VORTEX_GDT60x0	0x0000
+#define PCI_DEVICE_ID_VORTEX_GDT6000B	0x0001
+#define PCI_DEVICE_ID_VORTEX_GDT6x10	0x0002
+#define PCI_DEVICE_ID_VORTEX_GDT6x20	0x0003
+#define PCI_DEVICE_ID_VORTEX_GDT6530	0x0004
+#define PCI_DEVICE_ID_VORTEX_GDT6550	0x0005
+#define PCI_DEVICE_ID_VORTEX_GDT6x17	0x0006
+#define PCI_DEVICE_ID_VORTEX_GDT6x27	0x0007
+#define PCI_DEVICE_ID_VORTEX_GDT6537	0x0008
+#define PCI_DEVICE_ID_VORTEX_GDT6557	0x0009
+#define PCI_DEVICE_ID_VORTEX_GDT6x15	0x000a
+#define PCI_DEVICE_ID_VORTEX_GDT6x25	0x000b
+#define PCI_DEVICE_ID_VORTEX_GDT6535	0x000c
+#define PCI_DEVICE_ID_VORTEX_GDT6555	0x000d
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP	0x0100
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP	0x0101
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP	0x0102
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP	0x0103
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP	0x0104
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP	0x0105
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP1	0x0110
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP1	0x0111
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP1	0x0112
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP1	0x0113
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP1	0x0114
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP1	0x0115
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP2	0x0120
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP2	0x0121
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP2	0x0122
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP2	0x0123
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP2	0x0124
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP2	0x0125
+
+#define PCI_VENDOR_ID_EF		0x111a
+#define PCI_DEVICE_ID_EF_ATM_FPGA	0x0000
+#define PCI_DEVICE_ID_EF_ATM_ASIC	0x0002
+
+#define PCI_VENDOR_ID_IDT		0x111d
+#define PCI_DEVICE_ID_IDT_IDT77201	0x0001
+
+#define PCI_VENDOR_ID_FORE		0x1127
+#define PCI_DEVICE_ID_FORE_PCA200PC	0x0210
+#define PCI_DEVICE_ID_FORE_PCA200E	0x0300
+
+#define PCI_VENDOR_ID_IMAGINGTECH	0x112f
+#define PCI_DEVICE_ID_IMAGINGTECH_ICPCI	0x0000
+
+#define PCI_VENDOR_ID_PHILIPS		0x1131
+#define PCI_DEVICE_ID_PHILIPS_SAA7145	0x7145
+#define PCI_DEVICE_ID_PHILIPS_SAA7146	0x7146
+#define PCI_DEVICE_ID_PHILIPS_SAA9730	0x9730
+
+#define PCI_VENDOR_ID_EICON		0x1133
+#define PCI_DEVICE_ID_EICON_DIVA20PRO	0xe001
+#define PCI_DEVICE_ID_EICON_DIVA20	0xe002
+#define PCI_DEVICE_ID_EICON_DIVA20PRO_U	0xe003
+#define PCI_DEVICE_ID_EICON_DIVA20_U	0xe004
+#define PCI_DEVICE_ID_EICON_DIVA201	0xe005
+#define PCI_DEVICE_ID_EICON_DIVA202	0xe00b
+#define PCI_DEVICE_ID_EICON_MAESTRA	0xe010
+#define PCI_DEVICE_ID_EICON_MAESTRAQ	0xe012
+#define PCI_DEVICE_ID_EICON_MAESTRAQ_U	0xe013
+#define PCI_DEVICE_ID_EICON_MAESTRAP	0xe014
+ 
+#define PCI_VENDOR_ID_CYCLONE		0x113c
+#define PCI_DEVICE_ID_CYCLONE_SDK	0x0001
+
+#define PCI_VENDOR_ID_ALLIANCE		0x1142
+#define PCI_DEVICE_ID_ALLIANCE_PROMOTIO	0x3210
+#define PCI_DEVICE_ID_ALLIANCE_PROVIDEO	0x6422
+#define PCI_DEVICE_ID_ALLIANCE_AT24	0x6424
+#define PCI_DEVICE_ID_ALLIANCE_AT3D	0x643d
+
+#define PCI_VENDOR_ID_SYSKONNECT	0x1148
+#define PCI_DEVICE_ID_SYSKONNECT_FP	0x4000
+#define PCI_DEVICE_ID_SYSKONNECT_TR	0x4200
+#define PCI_DEVICE_ID_SYSKONNECT_GE	0x4300
+
+#define PCI_VENDOR_ID_VMIC		0x114a
+#define PCI_DEVICE_ID_VMIC_VME		0x7587
+
+#define PCI_VENDOR_ID_DIGI		0x114f
+#define PCI_DEVICE_ID_DIGI_EPC		0x0002
+#define PCI_DEVICE_ID_DIGI_RIGHTSWITCH	0x0003
+#define PCI_DEVICE_ID_DIGI_XEM		0x0004
+#define PCI_DEVICE_ID_DIGI_XR		0x0005
+#define PCI_DEVICE_ID_DIGI_CX		0x0006
+#define PCI_DEVICE_ID_DIGI_XRJ		0x0009
+#define PCI_DEVICE_ID_DIGI_EPCJ		0x000a
+#define PCI_DEVICE_ID_DIGI_XR_920	0x0027
+#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E	0x0070
+#define PCI_DEVICE_ID_DIGI_DF_M_E	0x0071
+#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A	0x0072
+#define PCI_DEVICE_ID_DIGI_DF_M_A	0x0073
+
+#define PCI_VENDOR_ID_MUTECH		0x1159
+#define PCI_DEVICE_ID_MUTECH_MV1000	0x0001
+
+#define PCI_VENDOR_ID_XIRCOM		0x115d
+#define PCI_DEVICE_ID_XIRCOM_X3201_ETH	0x0003
+#define PCI_DEVICE_ID_XIRCOM_X3201_MDM	0x0103
+
+#define PCI_VENDOR_ID_RENDITION		0x1163
+#define PCI_DEVICE_ID_RENDITION_VERITE	0x0001
+#define PCI_DEVICE_ID_RENDITION_VERITE2100 0x2000
+
+#define PCI_VENDOR_ID_SERVERWORKS	  0x1166
+#define PCI_DEVICE_ID_SERVERWORKS_HE	  0x0008
+#define PCI_DEVICE_ID_SERVERWORKS_LE	  0x0009
+#define PCI_DEVICE_ID_SERVERWORKS_CIOB30  0x0010
+#define PCI_DEVICE_ID_SERVERWORKS_CMIC_HE 0x0011
+#define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4	  0x0200
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5	  0x0201
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6    0x0203
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2 0x0217
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6USB 0x0221
+#define PCI_DEVICE_ID_SERVERWORKS_GCLE    0x0225
+#define PCI_DEVICE_ID_SERVERWORKS_GCLE2   0x0227
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230
+
+#define PCI_VENDOR_ID_SBE		0x1176
+#define PCI_DEVICE_ID_SBE_WANXL100	0x0301
+#define PCI_DEVICE_ID_SBE_WANXL200	0x0302
+#define PCI_DEVICE_ID_SBE_WANXL400	0x0104
+
+#define PCI_VENDOR_ID_TOSHIBA		0x1179
+#define PCI_DEVICE_ID_TOSHIBA_601	0x0601
+#define PCI_DEVICE_ID_TOSHIBA_TOPIC95	0x060a
+#define PCI_DEVICE_ID_TOSHIBA_TOPIC97	0x060f
+
+#define PCI_VENDOR_ID_TOSHIBA_2		0x102f
+#define PCI_DEVICE_ID_TOSHIBA_TX3927	0x000a
+#define PCI_DEVICE_ID_TOSHIBA_TC35815CF	0x0030
+#define PCI_DEVICE_ID_TOSHIBA_TX4927	0x0180
+
+#define PCI_VENDOR_ID_RICOH		0x1180
+#define PCI_DEVICE_ID_RICOH_RL5C465	0x0465
+#define PCI_DEVICE_ID_RICOH_RL5C466	0x0466
+#define PCI_DEVICE_ID_RICOH_RL5C475	0x0475
+#define PCI_DEVICE_ID_RICOH_RL5C476	0x0476
+#define PCI_DEVICE_ID_RICOH_RL5C478	0x0478
+
+#define PCI_VENDOR_ID_ARTOP		0x1191
+#define PCI_DEVICE_ID_ARTOP_ATP8400	0x0004
+#define PCI_DEVICE_ID_ARTOP_ATP850UF	0x0005
+#define PCI_DEVICE_ID_ARTOP_ATP860	0x0006
+#define PCI_DEVICE_ID_ARTOP_ATP860R	0x0007
+#define PCI_DEVICE_ID_ARTOP_ATP865	0x0008
+#define PCI_DEVICE_ID_ARTOP_ATP865R	0x0009
+#define PCI_DEVICE_ID_ARTOP_AEC7610	0x8002
+#define PCI_DEVICE_ID_ARTOP_AEC7612UW	0x8010
+#define PCI_DEVICE_ID_ARTOP_AEC7612U	0x8020
+#define PCI_DEVICE_ID_ARTOP_AEC7612S	0x8030
+#define PCI_DEVICE_ID_ARTOP_AEC7612D	0x8040
+#define PCI_DEVICE_ID_ARTOP_AEC7612SUW	0x8050
+#define PCI_DEVICE_ID_ARTOP_8060	0x8060
+
+#define PCI_VENDOR_ID_ZEITNET		0x1193
+#define PCI_DEVICE_ID_ZEITNET_1221	0x0001
+#define PCI_DEVICE_ID_ZEITNET_1225	0x0002
+
+#define PCI_VENDOR_ID_OMEGA		0x119b
+#define PCI_DEVICE_ID_OMEGA_82C092G	0x1221
+
+#define PCI_VENDOR_ID_FUJITSU_ME	0x119e
+#define PCI_DEVICE_ID_FUJITSU_FS155	0x0001
+#define PCI_DEVICE_ID_FUJITSU_FS50	0x0003
+
+#define PCI_SUBVENDOR_ID_KEYSPAN	0x11a9
+#define PCI_SUBDEVICE_ID_KEYSPAN_SX2	0x5334
+
+#define PCI_VENDOR_ID_GALILEO		0x11ab
+#define PCI_DEVICE_ID_GALILEO_GT64011	0x4146
+#define PCI_DEVICE_ID_GALILEO_GT64111	0x4146
+#define PCI_DEVICE_ID_GALILEO_GT96100	0x9652
+#define PCI_DEVICE_ID_GALILEO_GT96100A	0x9653
+
+#define PCI_VENDOR_ID_LITEON		0x11ad
+#define PCI_DEVICE_ID_LITEON_LNE100TX	0x0002
+
+#define PCI_VENDOR_ID_V3		0x11b0
+#define PCI_DEVICE_ID_V3_V960		0x0001
+#define PCI_DEVICE_ID_V3_V350		0x0001
+#define PCI_DEVICE_ID_V3_V961		0x0002
+#define PCI_DEVICE_ID_V3_V351		0x0002
+
+#define PCI_VENDOR_ID_NP		0x11bc
+#define PCI_DEVICE_ID_NP_PCI_FDDI	0x0001
+
+#define PCI_VENDOR_ID_ATT		0x11c1
+#define PCI_DEVICE_ID_ATT_L56XMF	0x0440
+#define PCI_DEVICE_ID_ATT_VENUS_MODEM	0x480
+
+#define PCI_VENDOR_ID_SPECIALIX		0x11cb
+#define PCI_DEVICE_ID_SPECIALIX_IO8	0x2000
+#define PCI_DEVICE_ID_SPECIALIX_XIO	0x4000
+#define PCI_DEVICE_ID_SPECIALIX_RIO	0x8000
+#define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004
+
+#define PCI_VENDOR_ID_AURAVISION	0x11d1
+#define PCI_DEVICE_ID_AURAVISION_VXP524	0x01f7
+
+#define PCI_VENDOR_ID_ANALOG_DEVICES	0x11d4
+#define PCI_DEVICE_ID_AD1889JS		0x1889
+
+#define PCI_VENDOR_ID_IKON		0x11d5
+#define PCI_DEVICE_ID_IKON_10115	0x0115
+#define PCI_DEVICE_ID_IKON_10117	0x0117
+
+#define PCI_VENDOR_ID_ZORAN		0x11de
+#define PCI_DEVICE_ID_ZORAN_36057	0x6057
+#define PCI_DEVICE_ID_ZORAN_36120	0x6120
+
+#define PCI_VENDOR_ID_KINETIC		0x11f4
+#define PCI_DEVICE_ID_KINETIC_2915	0x2915
+
+#define PCI_VENDOR_ID_COMPEX		0x11f6
+#define PCI_DEVICE_ID_COMPEX_ENET100VG4	0x0112
+#define PCI_DEVICE_ID_COMPEX_RL2000	0x1401
+
+#define PCI_VENDOR_ID_RP		0x11fe
+#define PCI_DEVICE_ID_RP32INTF		0x0001
+#define PCI_DEVICE_ID_RP8INTF		0x0002
+#define PCI_DEVICE_ID_RP16INTF		0x0003
+#define PCI_DEVICE_ID_RP4QUAD		0x0004
+#define PCI_DEVICE_ID_RP8OCTA		0x0005
+#define PCI_DEVICE_ID_RP8J		0x0006
+#define PCI_DEVICE_ID_RPP4		0x000A
+#define PCI_DEVICE_ID_RPP8		0x000B
+#define PCI_DEVICE_ID_RP8M		0x000C
+
+#define PCI_VENDOR_ID_CYCLADES		0x120e
+#define PCI_DEVICE_ID_CYCLOM_Y_Lo	0x0100
+#define PCI_DEVICE_ID_CYCLOM_Y_Hi	0x0101
+#define PCI_DEVICE_ID_CYCLOM_4Y_Lo	0x0102
+#define PCI_DEVICE_ID_CYCLOM_4Y_Hi	0x0103
+#define PCI_DEVICE_ID_CYCLOM_8Y_Lo	0x0104
+#define PCI_DEVICE_ID_CYCLOM_8Y_Hi	0x0105
+#define PCI_DEVICE_ID_CYCLOM_Z_Lo	0x0200
+#define PCI_DEVICE_ID_CYCLOM_Z_Hi	0x0201
+#define PCI_DEVICE_ID_PC300_RX_2	0x0300
+#define PCI_DEVICE_ID_PC300_RX_1	0x0301
+#define PCI_DEVICE_ID_PC300_TE_2	0x0310
+#define PCI_DEVICE_ID_PC300_TE_1	0x0311
+
+#define PCI_VENDOR_ID_ESSENTIAL		0x120f
+#define PCI_DEVICE_ID_ESSENTIAL_ROADRUNNER	0x0001
+
+#define PCI_VENDOR_ID_O2		0x1217
+#define PCI_DEVICE_ID_O2_6729		0x6729
+#define PCI_DEVICE_ID_O2_6730		0x673a
+#define PCI_DEVICE_ID_O2_6832		0x6832
+#define PCI_DEVICE_ID_O2_6836		0x6836
+
+#define PCI_VENDOR_ID_3DFX		0x121a
+#define PCI_DEVICE_ID_3DFX_VOODOO	0x0001
+#define PCI_DEVICE_ID_3DFX_VOODOO2	0x0002
+#define PCI_DEVICE_ID_3DFX_BANSHEE	0x0003
+#define PCI_DEVICE_ID_3DFX_VOODOO3	0x0005
+
+#define PCI_VENDOR_ID_SIGMADES		0x1236
+#define PCI_DEVICE_ID_SIGMADES_6425	0x6401
+
+#define PCI_VENDOR_ID_CCUBE		0x123f
+
+#define PCI_VENDOR_ID_AVM		0x1244
+#define PCI_DEVICE_ID_AVM_B1		0x0700
+#define PCI_DEVICE_ID_AVM_C4		0x0800
+#define PCI_DEVICE_ID_AVM_A1		0x0a00
+#define PCI_DEVICE_ID_AVM_A1_V2		0x0e00
+#define PCI_DEVICE_ID_AVM_C2		0x1100
+#define PCI_DEVICE_ID_AVM_T1		0x1200
+
+#define PCI_VENDOR_ID_DIPIX		0x1246
+
+#define PCI_VENDOR_ID_STALLION		0x124d
+#define PCI_DEVICE_ID_STALLION_ECHPCI832 0x0000
+#define PCI_DEVICE_ID_STALLION_ECHPCI864 0x0002
+#define PCI_DEVICE_ID_STALLION_EIOPCI	0x0003
+
+#define PCI_VENDOR_ID_OPTIBASE		0x1255
+#define PCI_DEVICE_ID_OPTIBASE_FORGE	0x1110
+#define PCI_DEVICE_ID_OPTIBASE_FUSION	0x1210
+#define PCI_DEVICE_ID_OPTIBASE_VPLEX	0x2110
+#define PCI_DEVICE_ID_OPTIBASE_VPLEXCC	0x2120
+#define PCI_DEVICE_ID_OPTIBASE_VQUEST	0x2130
+
+#define PCI_VENDOR_ID_ESS		0x125d
+#define PCI_DEVICE_ID_ESS_ESS1968	0x1968
+#define PCI_DEVICE_ID_ESS_AUDIOPCI	0x1969
+#define PCI_DEVICE_ID_ESS_ESS1978	0x1978
+
+#define PCI_VENDOR_ID_SATSAGEM		0x1267
+#define PCI_DEVICE_ID_SATSAGEM_NICCY	0x1016
+#define PCI_DEVICE_ID_SATSAGEM_PCR2101	0x5352
+#define PCI_DEVICE_ID_SATSAGEM_TELSATTURBO 0x5a4b
+
+#define PCI_VENDOR_ID_HUGHES		0x1273
+#define PCI_DEVICE_ID_HUGHES_DIRECPC	0x0002
+
+#define PCI_VENDOR_ID_ENSONIQ		0x1274
+#define PCI_DEVICE_ID_ENSONIQ_CT5880	0x5880
+#define PCI_DEVICE_ID_ENSONIQ_ES1370	0x5000
+#define PCI_DEVICE_ID_ENSONIQ_ES1371	0x1371
+
+#define PCI_VENDOR_ID_ROCKWELL		0x127A
+
+#define PCI_VENDOR_ID_ITE		0x1283
+#define PCI_DEVICE_ID_ITE_IT8172G	0x8172
+#define PCI_DEVICE_ID_ITE_IT8172G_AUDIO 0x0801
+#define PCI_DEVICE_ID_ITE_8872		0x8872
+
+#define PCI_DEVICE_ID_ITE_IT8330G_0    0xe886
+
+/* formerly Platform Tech */
+#define PCI_VENDOR_ID_ESS_OLD		0x1285
+#define PCI_DEVICE_ID_ESS_ESS0100	0x0100
+
+#define PCI_VENDOR_ID_ALTEON		0x12ae
+#define PCI_DEVICE_ID_ALTEON_ACENIC	0x0001
+
+#define PCI_VENDOR_ID_USR		0x12B9
+
+#define PCI_SUBVENDOR_ID_CONNECT_TECH			0x12c4
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232		0x0001
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232		0x0002
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232		0x0003
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485		0x0004
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4	0x0005
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485		0x0006
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2	0x0007
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485		0x0008
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6	0x0009
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1	0x000A
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1	0x000B
+
+#define PCI_VENDOR_ID_PICTUREL		0x12c5
+#define PCI_DEVICE_ID_PICTUREL_PCIVST	0x0081
+
+#define PCI_VENDOR_ID_NVIDIA_SGS	0x12d2
+#define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018
+
+#define PCI_SUBVENDOR_ID_CHASE_PCIFAST		0x12E0
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST4		0x0031
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST8		0x0021
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16	0x0011
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16FMC	0x0041
+#define PCI_SUBVENDOR_ID_CHASE_PCIRAS		0x124D
+#define PCI_SUBDEVICE_ID_CHASE_PCIRAS4		0xF001
+#define PCI_SUBDEVICE_ID_CHASE_PCIRAS8		0xF010
+
+#define PCI_VENDOR_ID_AUREAL		0x12eb
+#define PCI_DEVICE_ID_AUREAL_VORTEX_1	0x0001
+#define PCI_DEVICE_ID_AUREAL_VORTEX_2	0x0002
+
+#define PCI_VENDOR_ID_CBOARDS		0x1307
+#define PCI_DEVICE_ID_CBOARDS_DAS1602_16 0x0001
+
+#define PCI_VENDOR_ID_SIIG		0x131f
+#define PCI_DEVICE_ID_SIIG_1S_10x_550	0x1000
+#define PCI_DEVICE_ID_SIIG_1S_10x_650	0x1001
+#define PCI_DEVICE_ID_SIIG_1S_10x_850	0x1002
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_550	0x1010
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_650	0x1011
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_850	0x1012
+#define PCI_DEVICE_ID_SIIG_1P_10x	0x1020
+#define PCI_DEVICE_ID_SIIG_2P_10x	0x1021
+#define PCI_DEVICE_ID_SIIG_2S_10x_550	0x1030
+#define PCI_DEVICE_ID_SIIG_2S_10x_650	0x1031
+#define PCI_DEVICE_ID_SIIG_2S_10x_850	0x1032
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_550	0x1034
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_650	0x1035
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_850	0x1036
+#define PCI_DEVICE_ID_SIIG_4S_10x_550	0x1050
+#define PCI_DEVICE_ID_SIIG_4S_10x_650	0x1051
+#define PCI_DEVICE_ID_SIIG_4S_10x_850	0x1052
+#define PCI_DEVICE_ID_SIIG_1S_20x_550	0x2000
+#define PCI_DEVICE_ID_SIIG_1S_20x_650	0x2001
+#define PCI_DEVICE_ID_SIIG_1S_20x_850	0x2002
+#define PCI_DEVICE_ID_SIIG_1P_20x	0x2020
+#define PCI_DEVICE_ID_SIIG_2P_20x	0x2021
+#define PCI_DEVICE_ID_SIIG_2S_20x_550	0x2030
+#define PCI_DEVICE_ID_SIIG_2S_20x_650	0x2031
+#define PCI_DEVICE_ID_SIIG_2S_20x_850	0x2032
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_550	0x2040
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_650	0x2041
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_850	0x2042
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_550	0x2010
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_650	0x2011
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_850	0x2012
+#define PCI_DEVICE_ID_SIIG_4S_20x_550	0x2050
+#define PCI_DEVICE_ID_SIIG_4S_20x_650	0x2051
+#define PCI_DEVICE_ID_SIIG_4S_20x_850	0x2052
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_550	0x2060
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_650	0x2061
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_850	0x2062
+
+#define PCI_VENDOR_ID_DOMEX		0x134a
+#define PCI_DEVICE_ID_DOMEX_DMX3191D	0x0001
+
+#define PCI_VENDOR_ID_QUATECH		0x135C
+#define PCI_DEVICE_ID_QUATECH_QSC100	0x0010
+#define PCI_DEVICE_ID_QUATECH_DSC100	0x0020
+#define PCI_DEVICE_ID_QUATECH_DSC200	0x0030
+#define PCI_DEVICE_ID_QUATECH_QSC200	0x0040
+#define PCI_DEVICE_ID_QUATECH_ESC100D	0x0050
+#define PCI_DEVICE_ID_QUATECH_ESC100M	0x0060
+
+#define PCI_VENDOR_ID_SEALEVEL		0x135e
+#define PCI_DEVICE_ID_SEALEVEL_U530	0x7101
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM2	0x7201
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM422	0x7402
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM232	0x7202
+#define PCI_DEVICE_ID_SEALEVEL_COMM4	0x7401
+#define PCI_DEVICE_ID_SEALEVEL_COMM8	0x7801
+
+#define PCI_VENDOR_ID_HYPERCOPE		0x1365
+#define PCI_DEVICE_ID_HYPERCOPE_PLX	0x9050
+#define PCI_SUBDEVICE_ID_HYPERCOPE_OLD_ERGO	0x0104
+#define PCI_SUBDEVICE_ID_HYPERCOPE_ERGO		0x0106
+#define PCI_SUBDEVICE_ID_HYPERCOPE_METRO	0x0107
+#define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2	0x0108
+#define PCI_SUBDEVICE_ID_HYPERCOPE_PLEXUS	0x0109
+
+#define PCI_VENDOR_ID_KAWASAKI		0x136b
+#define PCI_DEVICE_ID_MCHIP_KL5A72002	0xff01
+
+#define PCI_VENDOR_ID_LMC		0x1376
+#define PCI_DEVICE_ID_LMC_HSSI		0x0003
+#define PCI_DEVICE_ID_LMC_DS3		0x0004
+#define PCI_DEVICE_ID_LMC_SSI		0x0005
+#define PCI_DEVICE_ID_LMC_T1		0x0006
+
+#define PCI_VENDOR_ID_NETGEAR		0x1385
+#define PCI_DEVICE_ID_NETGEAR_GA620	0x620a
+#define PCI_DEVICE_ID_NETGEAR_GA622	0x622a
+
+#define PCI_VENDOR_ID_APPLICOM		0x1389
+#define PCI_DEVICE_ID_APPLICOM_PCIGENERIC 0x0001
+#define PCI_DEVICE_ID_APPLICOM_PCI2000IBS_CAN 0x0002
+#define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003
+
+#define PCI_VENDOR_ID_MOXA		0x1393
+#define PCI_DEVICE_ID_MOXA_C104		0x1040
+#define PCI_DEVICE_ID_MOXA_C168		0x1680
+#define PCI_DEVICE_ID_MOXA_CP204J	0x2040
+#define PCI_DEVICE_ID_MOXA_C218		0x2180
+#define PCI_DEVICE_ID_MOXA_C320		0x3200
+
+#define PCI_VENDOR_ID_CCD		0x1397
+#define PCI_DEVICE_ID_CCD_2BD0		0x2bd0
+#define PCI_DEVICE_ID_CCD_B000		0xb000
+#define PCI_DEVICE_ID_CCD_B006		0xb006
+#define PCI_DEVICE_ID_CCD_B007		0xb007
+#define PCI_DEVICE_ID_CCD_B008		0xb008
+#define PCI_DEVICE_ID_CCD_B009		0xb009
+#define PCI_DEVICE_ID_CCD_B00A		0xb00a
+#define PCI_DEVICE_ID_CCD_B00B		0xb00b
+#define PCI_DEVICE_ID_CCD_B00C		0xb00c
+#define PCI_DEVICE_ID_CCD_B100		0xb100
+
+#define PCI_VENDOR_ID_3WARE		0x13C1
+#define PCI_DEVICE_ID_3WARE_1000	0x1000
+
+#define PCI_VENDOR_ID_ABOCOM		0x13D1
+#define PCI_DEVICE_ID_ABOCOM_2BD1       0x2BD1
+
+#define PCI_VENDOR_ID_CMEDIA		0x13f6
+#define PCI_DEVICE_ID_CMEDIA_CM8338A	0x0100
+#define PCI_DEVICE_ID_CMEDIA_CM8338B	0x0101
+#define PCI_DEVICE_ID_CMEDIA_CM8738	0x0111
+#define PCI_DEVICE_ID_CMEDIA_CM8738B	0x0112
+
+#define PCI_VENDOR_ID_LAVA		0x1407
+#define PCI_DEVICE_ID_LAVA_DSERIAL	0x0100 /* 2x 16550 */
+#define PCI_DEVICE_ID_LAVA_QUATRO_A	0x0101 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUATRO_B	0x0102 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_OCTO_A	0x0180 /* 4x 16550A, half of 8 port */
+#define PCI_DEVICE_ID_LAVA_OCTO_B	0x0181 /* 4x 16550A, half of 8 port */
+#define PCI_DEVICE_ID_LAVA_PORT_PLUS	0x0200 /* 2x 16650 */
+#define PCI_DEVICE_ID_LAVA_QUAD_A	0x0201 /* 2x 16650, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUAD_B	0x0202 /* 2x 16650, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_SSERIAL	0x0500 /* 1x 16550 */
+#define PCI_DEVICE_ID_LAVA_PORT_650	0x0600 /* 1x 16650 */
+#define PCI_DEVICE_ID_LAVA_PARALLEL	0x8000
+#define PCI_DEVICE_ID_LAVA_DUAL_PAR_A	0x8002 /* The Lava Dual Parallel is */
+#define PCI_DEVICE_ID_LAVA_DUAL_PAR_B	0x8003 /* two PCI devices on a card */
+#define PCI_DEVICE_ID_LAVA_BOCA_IOPPAR	0x8800
+
+#define PCI_VENDOR_ID_TIMEDIA		0x1409
+#define PCI_DEVICE_ID_TIMEDIA_1889	0x7168
+
+#define PCI_VENDOR_ID_OXSEMI		0x1415
+#define PCI_DEVICE_ID_OXSEMI_12PCI840	0x8403
+#define PCI_DEVICE_ID_OXSEMI_16PCI954	0x9501
+#define PCI_DEVICE_ID_OXSEMI_16PCI95N	0x9511
+#define PCI_DEVICE_ID_OXSEMI_16PCI954PP	0x9513
+#define PCI_DEVICE_ID_OXSEMI_16PCI952	0x9521
+
+#define PCI_VENDOR_ID_AIRONET		0x14b9
+#define PCI_DEVICE_ID_AIRONET_4800_1	0x0001
+#define PCI_DEVICE_ID_AIRONET_4800	0x4500 // values switched?  see
+#define PCI_DEVICE_ID_AIRONET_4500	0x4800 // drivers/net/aironet4500_card.c
+
+#define PCI_VENDOR_ID_TITAN		0x14D2
+#define PCI_DEVICE_ID_TITAN_010L	0x8001
+#define PCI_DEVICE_ID_TITAN_100L	0x8010
+#define PCI_DEVICE_ID_TITAN_110L	0x8011
+#define PCI_DEVICE_ID_TITAN_200L	0x8020
+#define PCI_DEVICE_ID_TITAN_210L	0x8021
+#define PCI_DEVICE_ID_TITAN_400L	0x8040
+#define PCI_DEVICE_ID_TITAN_800L	0x8080
+#define PCI_DEVICE_ID_TITAN_100		0xA001
+#define PCI_DEVICE_ID_TITAN_200		0xA005
+#define PCI_DEVICE_ID_TITAN_400		0xA003
+#define PCI_DEVICE_ID_TITAN_800B	0xA004
+
+#define PCI_VENDOR_ID_PANACOM		0x14d4
+#define PCI_DEVICE_ID_PANACOM_QUADMODEM	0x0400
+#define PCI_DEVICE_ID_PANACOM_DUALMODEM	0x0402
+
+#define PCI_VENDOR_ID_AFAVLAB		0x14db
+#define PCI_DEVICE_ID_AFAVLAB_P028	0x2180
+
+#define PCI_VENDOR_ID_BROADCOM		0x14e4
+#define PCI_DEVICE_ID_TIGON3_5700	0x1644
+#define PCI_DEVICE_ID_TIGON3_5701	0x1645
+#define PCI_DEVICE_ID_TIGON3_5702	0x1646
+#define PCI_DEVICE_ID_TIGON3_5703	0x1647
+#define PCI_DEVICE_ID_TIGON3_5704	0x1648
+#define PCI_DEVICE_ID_TIGON3_5702FE	0x164d
+#define PCI_DEVICE_ID_TIGON3_5702X	0x16a6
+#define PCI_DEVICE_ID_TIGON3_5703X	0x16a7
+
+#define PCI_VENDOR_ID_SYBA		0x1592
+#define PCI_DEVICE_ID_SYBA_2P_EPP	0x0782
+#define PCI_DEVICE_ID_SYBA_1P_ECP	0x0783
+
+#define PCI_VENDOR_ID_MORETON		0x15aa
+#define PCI_DEVICE_ID_RASTEL_2PORT	0x2000
+
+#define PCI_VENDOR_ID_ZOLTRIX		0x15b0
+#define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0 
+
+#define PCI_VENDOR_ID_PDC		0x15e9
+#define PCI_DEVICE_ID_PDC_1841		0x1841
+
+#define PCI_VENDOR_ID_ALTIMA		0x173b
+#define PCI_DEVICE_ID_ALTIMA_AC1000	0x03e8
+#define PCI_DEVICE_ID_ALTIMA_AC9100	0x03ea
+
+#define PCI_VENDOR_ID_SYMPHONY		0x1c1c
+#define PCI_DEVICE_ID_SYMPHONY_101	0x0001
+
+#define PCI_VENDOR_ID_TEKRAM		0x1de1
+#define PCI_DEVICE_ID_TEKRAM_DC290	0xdc29
+
+#define PCI_VENDOR_ID_HINT		0x3388
+#define PCI_DEVICE_ID_HINT_VXPROII_IDE	0x8013
+
+#define PCI_VENDOR_ID_3DLABS		0x3d3d
+#define PCI_DEVICE_ID_3DLABS_300SX	0x0001
+#define PCI_DEVICE_ID_3DLABS_500TX	0x0002
+#define PCI_DEVICE_ID_3DLABS_DELTA	0x0003
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA	0x0004
+#define PCI_DEVICE_ID_3DLABS_MX		0x0006
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA2	0x0007
+#define PCI_DEVICE_ID_3DLABS_GAMMA	0x0008
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA2V	0x0009
+
+#define PCI_VENDOR_ID_AVANCE		0x4005
+#define PCI_DEVICE_ID_AVANCE_ALG2064	0x2064
+#define PCI_DEVICE_ID_AVANCE_2302	0x2302
+
+#define PCI_VENDOR_ID_AKS		0x416c
+#define PCI_DEVICE_ID_AKS_ALADDINCARD	0x0100
+#define PCI_DEVICE_ID_AKS_CPC		0x0200
+
+#define PCI_VENDOR_ID_NETVIN		0x4a14
+#define PCI_DEVICE_ID_NETVIN_NV5000SC	0x5000
+
+#define PCI_VENDOR_ID_S3		0x5333
+#define PCI_DEVICE_ID_S3_PLATO_PXS	0x0551
+#define PCI_DEVICE_ID_S3_ViRGE		0x5631
+#define PCI_DEVICE_ID_S3_TRIO		0x8811
+#define PCI_DEVICE_ID_S3_AURORA64VP	0x8812
+#define PCI_DEVICE_ID_S3_TRIO64UVP	0x8814
+#define PCI_DEVICE_ID_S3_ViRGE_VX	0x883d
+#define PCI_DEVICE_ID_S3_868		0x8880
+#define PCI_DEVICE_ID_S3_928		0x88b0
+#define PCI_DEVICE_ID_S3_864_1		0x88c0
+#define PCI_DEVICE_ID_S3_864_2		0x88c1
+#define PCI_DEVICE_ID_S3_964_1		0x88d0
+#define PCI_DEVICE_ID_S3_964_2		0x88d1
+#define PCI_DEVICE_ID_S3_968		0x88f0
+#define PCI_DEVICE_ID_S3_TRIO64V2	0x8901
+#define PCI_DEVICE_ID_S3_PLATO_PXG	0x8902
+#define PCI_DEVICE_ID_S3_ViRGE_DXGX	0x8a01
+#define PCI_DEVICE_ID_S3_ViRGE_GX2	0x8a10
+#define PCI_DEVICE_ID_S3_ViRGE_MX	0x8c01
+#define PCI_DEVICE_ID_S3_ViRGE_MXP	0x8c02
+#define PCI_DEVICE_ID_S3_ViRGE_MXPMV	0x8c03
+#define PCI_DEVICE_ID_S3_SONICVIBES	0xca00
+
+#define PCI_VENDOR_ID_DUNORD		0x5544
+#define PCI_DEVICE_ID_DUNORD_I3000	0x0001
+#define PCI_VENDOR_ID_GENROCO		0x5555
+#define PCI_DEVICE_ID_GENROCO_HFP832	0x0003
+
+#define PCI_VENDOR_ID_DCI		0x6666
+#define PCI_DEVICE_ID_DCI_PCCOM4	0x0001
+#define PCI_DEVICE_ID_DCI_PCCOM8	0x0002
+
+#define PCI_VENDOR_ID_INTEL		0x8086
+#define PCI_DEVICE_ID_INTEL_21145	0x0039
+#define PCI_DEVICE_ID_INTEL_82375	0x0482
+#define PCI_DEVICE_ID_INTEL_82424	0x0483
+#define PCI_DEVICE_ID_INTEL_82378	0x0484
+#define PCI_DEVICE_ID_INTEL_82430	0x0486
+#define PCI_DEVICE_ID_INTEL_82434	0x04a3
+#define PCI_DEVICE_ID_INTEL_I960	0x0960
+#define PCI_DEVICE_ID_INTEL_I960RM	0x0962
+#define PCI_DEVICE_ID_INTEL_82562ET	0x1031
+
+#define PCI_DEVICE_ID_INTEL_82815_MC	0x1130
+
+#define PCI_DEVICE_ID_INTEL_82559ER	0x1209
+#define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
+#define PCI_DEVICE_ID_INTEL_82092AA_1	0x1222
+#define PCI_DEVICE_ID_INTEL_7116	0x1223
+#define PCI_DEVICE_ID_INTEL_82596	0x1226
+#define PCI_DEVICE_ID_INTEL_82865	0x1227
+#define PCI_DEVICE_ID_INTEL_82557	0x1229
+#define PCI_DEVICE_ID_INTEL_82437	0x122d
+#define PCI_DEVICE_ID_INTEL_82371FB_0	0x122e
+#define PCI_DEVICE_ID_INTEL_82371FB_1	0x1230
+#define PCI_DEVICE_ID_INTEL_82371MX	0x1234
+#define PCI_DEVICE_ID_INTEL_82437MX	0x1235
+#define PCI_DEVICE_ID_INTEL_82441	0x1237
+#define PCI_DEVICE_ID_INTEL_82380FB	0x124b
+#define PCI_DEVICE_ID_INTEL_82439	0x1250
+#define PCI_DEVICE_ID_INTEL_80960_RP	0x1960
+#define PCI_DEVICE_ID_INTEL_82371SB_0	0x7000
+#define PCI_DEVICE_ID_INTEL_82371SB_1	0x7010
+#define PCI_DEVICE_ID_INTEL_82371SB_2	0x7020
+#define PCI_DEVICE_ID_INTEL_82437VX	0x7030
+#define PCI_DEVICE_ID_INTEL_82439TX	0x7100
+#define PCI_DEVICE_ID_INTEL_82371AB_0	0x7110
+#define PCI_DEVICE_ID_INTEL_82371AB	0x7111
+#define PCI_DEVICE_ID_INTEL_82371AB_2	0x7112
+#define PCI_DEVICE_ID_INTEL_82371AB_3	0x7113
+#define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
+#define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
+#define PCI_DEVICE_ID_INTEL_82801AA_2	0x2412
+#define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
+#define PCI_DEVICE_ID_INTEL_82801AA_5	0x2415
+#define PCI_DEVICE_ID_INTEL_82801AA_6	0x2416
+#define PCI_DEVICE_ID_INTEL_82801AA_8	0x2418
+#define PCI_DEVICE_ID_INTEL_82801AB_0	0x2420
+#define PCI_DEVICE_ID_INTEL_82801AB_1	0x2421
+#define PCI_DEVICE_ID_INTEL_82801AB_2	0x2422
+#define PCI_DEVICE_ID_INTEL_82801AB_3	0x2423
+#define PCI_DEVICE_ID_INTEL_82801AB_5	0x2425
+#define PCI_DEVICE_ID_INTEL_82801AB_6	0x2426
+#define PCI_DEVICE_ID_INTEL_82801AB_8	0x2428
+#define PCI_DEVICE_ID_INTEL_82801BA_0	0x2440
+#define PCI_DEVICE_ID_INTEL_82801BA_1	0x2442
+#define PCI_DEVICE_ID_INTEL_82801BA_2	0x2443
+#define PCI_DEVICE_ID_INTEL_82801BA_3	0x2444
+#define PCI_DEVICE_ID_INTEL_82801BA_4	0x2445
+#define PCI_DEVICE_ID_INTEL_82801BA_5	0x2446
+#define PCI_DEVICE_ID_INTEL_82801BA_6	0x2448
+#define PCI_DEVICE_ID_INTEL_82801BA_7	0x2449
+#define PCI_DEVICE_ID_INTEL_82801BA_8	0x244a
+#define PCI_DEVICE_ID_INTEL_82801BA_9	0x244b
+#define PCI_DEVICE_ID_INTEL_82801BA_10	0x244c
+#define PCI_DEVICE_ID_INTEL_82801BA_11	0x244e
+#define PCI_DEVICE_ID_INTEL_82801E_0	0x2450
+#define PCI_DEVICE_ID_INTEL_82801E_2	0x2452
+#define PCI_DEVICE_ID_INTEL_82801E_3	0x2453
+#define PCI_DEVICE_ID_INTEL_82801E_9	0x2459
+#define PCI_DEVICE_ID_INTEL_82801E_11	0x245B
+#define PCI_DEVICE_ID_INTEL_82801E_14	0x245D
+#define PCI_DEVICE_ID_INTEL_82801E_15	0x245E
+#define PCI_DEVICE_ID_INTEL_82801CA_0	0x2480
+#define PCI_DEVICE_ID_INTEL_82801CA_2	0x2482
+#define PCI_DEVICE_ID_INTEL_82801CA_3	0x2483
+#define PCI_DEVICE_ID_INTEL_82801CA_4	0x2484
+#define PCI_DEVICE_ID_INTEL_82801CA_5	0x2485
+#define PCI_DEVICE_ID_INTEL_82801CA_6	0x2486
+#define PCI_DEVICE_ID_INTEL_82801CA_7	0x2487
+#define PCI_DEVICE_ID_INTEL_82801CA_10	0x248a
+#define PCI_DEVICE_ID_INTEL_82801CA_11	0x248b
+#define PCI_DEVICE_ID_INTEL_82801CA_12	0x248c
+#define PCI_DEVICE_ID_INTEL_82801DB_0	0x24c0
+#define PCI_DEVICE_ID_INTEL_82801DB_2	0x24c2
+#define PCI_DEVICE_ID_INTEL_82801DB_3	0x24c3
+#define PCI_DEVICE_ID_INTEL_82801DB_4	0x24c4
+#define PCI_DEVICE_ID_INTEL_82801DB_5	0x24c5
+#define PCI_DEVICE_ID_INTEL_82801DB_6	0x24c6
+#define PCI_DEVICE_ID_INTEL_82801DB_7	0x24c7
+#define PCI_DEVICE_ID_INTEL_82801DB_11	0x24cb
+#define PCI_DEVICE_ID_INTEL_82801DB_13	0x24cd
+#define PCI_DEVICE_ID_INTEL_80310	0x530d
+#define PCI_DEVICE_ID_INTEL_82810_MC1	0x7120
+#define PCI_DEVICE_ID_INTEL_82810_IG1	0x7121
+#define PCI_DEVICE_ID_INTEL_82810_MC3	0x7122
+#define PCI_DEVICE_ID_INTEL_82810_IG3	0x7123
+#define PCI_DEVICE_ID_INTEL_82443LX_0	0x7180
+#define PCI_DEVICE_ID_INTEL_82443LX_1	0x7181
+#define PCI_DEVICE_ID_INTEL_82443BX_0	0x7190
+#define PCI_DEVICE_ID_INTEL_82443BX_1	0x7191
+#define PCI_DEVICE_ID_INTEL_82443BX_2	0x7192
+#define PCI_DEVICE_ID_INTEL_82443MX_0	0x7198
+#define PCI_DEVICE_ID_INTEL_82443MX_1	0x7199
+#define PCI_DEVICE_ID_INTEL_82443MX_2	0x719a
+#define PCI_DEVICE_ID_INTEL_82443MX_3	0x719b
+#define PCI_DEVICE_ID_INTEL_82372FB_0	0x7600
+#define PCI_DEVICE_ID_INTEL_82372FB_1	0x7601
+#define PCI_DEVICE_ID_INTEL_82372FB_2	0x7602
+#define PCI_DEVICE_ID_INTEL_82372FB_3	0x7603
+#define PCI_DEVICE_ID_INTEL_82454GX	0x84c4
+#define PCI_DEVICE_ID_INTEL_82450GX	0x84c5
+#define PCI_DEVICE_ID_INTEL_82451NX	0x84ca
+
+#define PCI_VENDOR_ID_COMPUTONE		0x8e0e
+#define PCI_DEVICE_ID_COMPUTONE_IP2EX	0x0291
+#define PCI_DEVICE_ID_COMPUTONE_PG	0x0302
+#define PCI_SUBVENDOR_ID_COMPUTONE	0x8e0e
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG4	0x0001
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG8	0x0002
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG6	0x0003
+
+#define PCI_VENDOR_ID_KTI		0x8e2e
+#define PCI_DEVICE_ID_KTI_ET32P2	0x3000
+
+#define PCI_VENDOR_ID_ADAPTEC		0x9004
+#define PCI_DEVICE_ID_ADAPTEC_7810	0x1078
+#define PCI_DEVICE_ID_ADAPTEC_7821	0x2178
+#define PCI_DEVICE_ID_ADAPTEC_38602	0x3860
+#define PCI_DEVICE_ID_ADAPTEC_7850	0x5078
+#define PCI_DEVICE_ID_ADAPTEC_7855	0x5578
+#define PCI_DEVICE_ID_ADAPTEC_5800	0x5800
+#define PCI_DEVICE_ID_ADAPTEC_3860	0x6038
+#define PCI_DEVICE_ID_ADAPTEC_1480A	0x6075
+#define PCI_DEVICE_ID_ADAPTEC_7860	0x6078
+#define PCI_DEVICE_ID_ADAPTEC_7861	0x6178
+#define PCI_DEVICE_ID_ADAPTEC_7870	0x7078
+#define PCI_DEVICE_ID_ADAPTEC_7871	0x7178
+#define PCI_DEVICE_ID_ADAPTEC_7872	0x7278
+#define PCI_DEVICE_ID_ADAPTEC_7873	0x7378
+#define PCI_DEVICE_ID_ADAPTEC_7874	0x7478
+#define PCI_DEVICE_ID_ADAPTEC_7895	0x7895
+#define PCI_DEVICE_ID_ADAPTEC_7880	0x8078
+#define PCI_DEVICE_ID_ADAPTEC_7881	0x8178
+#define PCI_DEVICE_ID_ADAPTEC_7882	0x8278
+#define PCI_DEVICE_ID_ADAPTEC_7883	0x8378
+#define PCI_DEVICE_ID_ADAPTEC_7884	0x8478
+#define PCI_DEVICE_ID_ADAPTEC_7885	0x8578
+#define PCI_DEVICE_ID_ADAPTEC_7886	0x8678
+#define PCI_DEVICE_ID_ADAPTEC_7887	0x8778
+#define PCI_DEVICE_ID_ADAPTEC_7888	0x8878
+#define PCI_DEVICE_ID_ADAPTEC_1030	0x8b78
+
+#define PCI_VENDOR_ID_ADAPTEC2		0x9005
+#define PCI_DEVICE_ID_ADAPTEC2_2940U2	0x0010
+#define PCI_DEVICE_ID_ADAPTEC2_2930U2	0x0011
+#define PCI_DEVICE_ID_ADAPTEC2_7890B	0x0013
+#define PCI_DEVICE_ID_ADAPTEC2_7890	0x001f
+#define PCI_DEVICE_ID_ADAPTEC2_3940U2	0x0050
+#define PCI_DEVICE_ID_ADAPTEC2_3950U2D	0x0051
+#define PCI_DEVICE_ID_ADAPTEC2_7896	0x005f
+#define PCI_DEVICE_ID_ADAPTEC2_7892A	0x0080
+#define PCI_DEVICE_ID_ADAPTEC2_7892B	0x0081
+#define PCI_DEVICE_ID_ADAPTEC2_7892D	0x0083
+#define PCI_DEVICE_ID_ADAPTEC2_7892P	0x008f
+#define PCI_DEVICE_ID_ADAPTEC2_7899A	0x00c0
+#define PCI_DEVICE_ID_ADAPTEC2_7899B	0x00c1
+#define PCI_DEVICE_ID_ADAPTEC2_7899D	0x00c3
+#define PCI_DEVICE_ID_ADAPTEC2_7899P	0x00cf
+
+#define PCI_VENDOR_ID_ATRONICS		0x907f
+#define PCI_DEVICE_ID_ATRONICS_2015	0x2015
+
+#define PCI_VENDOR_ID_HOLTEK		0x9412
+#define PCI_DEVICE_ID_HOLTEK_6565	0x6565
+
+#define PCI_VENDOR_ID_NETMOS		0x9710
+#define PCI_DEVICE_ID_NETMOS_9735	0x9735
+#define PCI_DEVICE_ID_NETMOS_9835	0x9835
+
+#define PCI_SUBVENDOR_ID_EXSYS		0xd84d
+#define PCI_SUBDEVICE_ID_EXSYS_4014	0x4014
+
+#define PCI_VENDOR_ID_TIGERJET		0xe159
+#define PCI_DEVICE_ID_TIGERJET_300	0x0001
+#define PCI_DEVICE_ID_TIGERJET_100	0x0002
+
+#define PCI_VENDOR_ID_ARK		0xedd8
+#define PCI_DEVICE_ID_ARK_STING		0xa091
+#define PCI_DEVICE_ID_ARK_STINGARK	0xa099
+#define PCI_DEVICE_ID_ARK_2000MT	0xa0a1
+
+#define PCI_VENDOR_ID_MICROGATE		0x13c0
+#define PCI_DEVICE_ID_MICROGATE_USC	0x0010
+#define PCI_DEVICE_ID_MICROGATE_SCC	0x0020
+#define PCI_DEVICE_ID_MICROGATE_SCA	0x0030
diff --git a/xen/include/xeno/perfc.h b/xen/include/xeno/perfc.h
new file mode 100644
index 0000000000..31201eaa6d
--- /dev/null
+++ b/xen/include/xeno/perfc.h
@@ -0,0 +1,43 @@
+/*
+ * xen performance counters
+ */
+
+/* 
+ * NOTE: new counters must be defined in xen_perf_defn.h
+ * 
+ * PERFCOUNTER (counter, string)              define a new performance counter
+ * PERFCOUNTER_ARRY (counter, string, size)   define an array of counters
+ * 
+ * unsigned long perfc_value  (counter)        get value of a counter  
+ * unsigned long perfc_valuea (counter, index) get value of an array counter
+ * void perfc_incr   (counter)                 increment a counter          
+ * void perfc_incra  (counter, index)          increment an array counter   
+ * void perfc_add    (counter, value)          add a value to a counter     
+ * void perfc_adda   (counter, index, value)   add a value to array counter 
+ * void perfc_print  (counter)                 print out the counter
+ */
+
+#define PERFCOUNTER( var, name ) \
+unsigned long var[1];
+#define PERFCOUNTER_ARRAY( var, name, size ) \
+unsigned long var[size];
+
+struct perfcounter_t 
+{
+#include <xeno/perfc_defn.h>
+};
+
+extern struct perfcounter_t perfcounters;
+extern char *perfc_name[];
+
+#define perf_value(x)    perfcounters.x[0]
+#define perf_valuea(x,y) perfcounters.x[y]
+#define perf_incr(x)     perfcounters.x[0]++
+#define perf_incra(x,y)  perfcounters.x[y]++
+#define perf_add(x,y)    perfcounters.x[0]+=(y)
+#define perf_adda(x,y,z) perfcounters.x[y]+=(z)
+
+#define perf_print(x) \
+  __perfc_print(perfcounters.x, \
+	        &perfcounters.x[0] - ((unsigned long *)&perfcounters))
+
diff --git a/xen/include/xeno/perfc_defn.h b/xen/include/xeno/perfc_defn.h
new file mode 100644
index 0000000000..16ab4cd8c9
--- /dev/null
+++ b/xen/include/xeno/perfc_defn.h
@@ -0,0 +1,4 @@
+
+PERFCOUNTER( blockio_tx, "block io: messages received from tx queue" )
+PERFCOUNTER( blockio_rx, "block io: messages sent on rx queue" )
+
diff --git a/xen/include/xeno/prefetch.h b/xen/include/xeno/prefetch.h
new file mode 100644
index 0000000000..8d7d3ffeb4
--- /dev/null
+++ b/xen/include/xeno/prefetch.h
@@ -0,0 +1,60 @@
+/*
+ *  Generic cache management functions. Everything is arch-specific,  
+ *  but this header exists to make sure the defines/functions can be
+ *  used in a generic way.
+ *
+ *  2000-11-13  Arjan van de Ven   <arjan@fenrus.demon.nl>
+ *
+ */
+
+#ifndef _LINUX_PREFETCH_H
+#define _LINUX_PREFETCH_H
+
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+/*
+	prefetch(x) attempts to pre-emptively get the memory pointed to
+	by address "x" into the CPU L1 cache. 
+	prefetch(x) should not cause any kind of exception, prefetch(0) is
+	specifically ok.
+
+	prefetch() should be defined by the architecture, if not, the 
+	#define below provides a no-op define.	
+	
+	There are 3 prefetch() macros:
+	
+	prefetch(x)  	- prefetches the cacheline at "x" for read
+	prefetchw(x)	- prefetches the cacheline at "x" for write
+	spin_lock_prefetch(x) - prefectches the spinlock *x for taking
+	
+	there is also PREFETCH_STRIDE which is the architecure-prefered 
+	"lookahead" size for prefetching streamed operations.
+	
+*/
+
+/*
+ *	These cannot be do{}while(0) macros. See the mental gymnastics in
+ *	the loop macro.
+ */
+ 
+#ifndef ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_PREFETCHW
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_SPINLOCK_PREFETCH
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define spin_lock_prefetch(x) prefetchw(x)
+#endif
+
+#ifndef PREFETCH_STRIDE
+#define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
+#endif
+
+#endif
diff --git a/xen/include/xeno/reboot.h b/xen/include/xeno/reboot.h
new file mode 100644
index 0000000000..5f128a9525
--- /dev/null
+++ b/xen/include/xeno/reboot.h
@@ -0,0 +1,51 @@
+#ifndef _LINUX_REBOOT_H
+#define _LINUX_REBOOT_H
+
+/*
+ * Magic values required to use _reboot() system call.
+ */
+
+#define	LINUX_REBOOT_MAGIC1	0xfee1dead
+#define	LINUX_REBOOT_MAGIC2	672274793
+#define	LINUX_REBOOT_MAGIC2A	85072278
+#define	LINUX_REBOOT_MAGIC2B	369367448
+
+
+/*
+ * Commands accepted by the _reboot() system call.
+ *
+ * RESTART     Restart system using default command and mode.
+ * HALT        Stop OS and give system control to ROM monitor, if any.
+ * CAD_ON      Ctrl-Alt-Del sequence causes RESTART command.
+ * CAD_OFF     Ctrl-Alt-Del sequence sends SIGINT to init task.
+ * POWER_OFF   Stop OS and remove all power from system, if possible.
+ * RESTART2    Restart system using given command string.
+ */
+
+#define	LINUX_REBOOT_CMD_RESTART	0x01234567
+#define	LINUX_REBOOT_CMD_HALT		0xCDEF0123
+#define	LINUX_REBOOT_CMD_CAD_ON		0x89ABCDEF
+#define	LINUX_REBOOT_CMD_CAD_OFF	0x00000000
+#define	LINUX_REBOOT_CMD_POWER_OFF	0x4321FEDC
+#define	LINUX_REBOOT_CMD_RESTART2	0xA1B2C3D4
+
+
+#ifdef __KERNEL__
+
+#include <linux/notifier.h>
+
+extern int register_reboot_notifier(struct notifier_block *);
+extern int unregister_reboot_notifier(struct notifier_block *);
+
+
+/*
+ * Architecture-specific implementations of sys_reboot commands.
+ */
+
+extern void machine_restart(char *cmd);
+extern void machine_halt(void);
+extern void machine_power_off(void);
+
+#endif
+
+#endif /* _LINUX_REBOOT_H */
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
new file mode 100644
index 0000000000..3cffa46bf1
--- /dev/null
+++ b/xen/include/xeno/sched.h
@@ -0,0 +1,224 @@
+#ifndef _LINUX_SCHED_H
+#define _LINUX_SCHED_H
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/spinlock.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <xeno/smp.h>
+#include <asm/processor.h>
+#include <asm/current.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <xeno/dom0_ops.h>
+
+extern unsigned long volatile jiffies;
+extern rwlock_t tasklist_lock;
+
+#include <xeno/spinlock.h>
+
+struct mm_struct {
+    unsigned long cpu_vm_mask;
+    /*
+     * Every domain has a L1 pagetable of its own. Per-domain mappings
+     * are put in this table (eg. the current GDT is mapped here).
+     */
+    l2_pgentry_t *perdomain_pt;
+    pagetable_t  pagetable;
+    /* Current LDT selector. */
+    unsigned int ldt_sel;
+    /* Next entry is passed to LGDT on domain switch. */
+    char gdt[6];
+};
+
+/* Convenient accessor for mm.gdt. */
+#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
+#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a))
+#define GET_GDT_ENTRIES(_p)     ((*(u16 *)((_p)->mm.gdt + 0)))
+#define GET_GDT_ADDRESS(_p)     ((*(u32 *)((_p)->mm.gdt + 2)))
+
+extern struct mm_struct init_mm;
+#define IDLE0_MM                                                    \
+{                                                                   \
+    cpu_vm_mask: 0,                                                 \
+    perdomain_pt: 0,                                                \
+    pagetable:   mk_pagetable(__pa(idle0_pg_table))                 \
+}
+
+#define _HYP_EVENT_NEED_RESCHED 0
+#define _HYP_EVENT_NET_RX       1
+#define _HYP_EVENT_DIE          2
+
+#define PF_DONEFPUINIT  0x1  /* Has the FPU been initialised for this task? */
+#define PF_USEDFPU      0x2  /* Has this task used the FPU since last save? */
+#define PF_GUEST_STTS   0x4  /* Has the guest OS requested 'stts'?          */
+
+#include <xeno/vif.h>
+#include <xeno/block.h>
+
+struct task_struct {
+
+    int processor;
+    int state;
+	int hyp_events;
+    unsigned int domain;
+
+    /* An unsafe pointer into a shared data area. */
+    shared_info_t *shared_info;
+    
+    struct list_head pg_head;
+    unsigned int tot_pages;
+
+    /* Network I/O */
+    net_ring_t *net_ring_base;
+    net_vif_t *net_vif_list[MAX_GUEST_VIFS];
+    int num_net_vifs;
+
+    /* Block I/O */
+    blk_ring_t *blk_ring_base;
+    unsigned int blk_req_cons; /* request consumer */
+    struct list_head blkdev_list;
+    spinlock_t blk_ring_lock;
+
+    int has_cpu, policy, counter;
+
+    struct list_head run_list;
+    
+    struct mm_struct mm;
+
+    mm_segment_t addr_limit;        /* thread address space:
+                                       0-0xBFFFFFFF for user-thead
+                                       0-0xFFFFFFFF for kernel-thread
+                                     */
+
+    /*
+     * active_mm stays for now. It's entangled in the tricky TLB flushing
+     * stuff which I haven't addressed yet. It stays until I'm man enough
+     * to venture in.
+     */
+    struct mm_struct *active_mm;
+    struct thread_struct thread;
+    struct task_struct *prev_task, *next_task;
+    
+    unsigned long flags;
+};
+
+/*
+ * domain states 
+ * TASK_RUNNING:         Domain is runable and should be on a run queue
+ * TASK_INTERRUPTIBLE:   Domain is blocked by may be woken up by an event
+ *                       or expiring timer
+ * TASK_UNINTERRUPTIBLE: Domain is blocked but may not be woken up by an
+ *                       arbitrary event or timer.
+ * TASK_WAIT:            Domains CPU allocation expired.
+ * TASK_STOPPED:         not really used in Xen
+ * TASK_DYING:           Domain is about to cross over to the land of the dead.
+ */
+
+#define TASK_RUNNING            0
+#define TASK_INTERRUPTIBLE      1
+#define TASK_UNINTERRUPTIBLE    2
+#define TASK_WAIT				4
+#define TASK_DYING              16
+/* #define TASK_STOPPED            8  not really used */
+
+#define SCHED_YIELD             0x10
+
+#include <asm/uaccess.h> /* for KERNEL_DS */
+
+#define IDLE0_TASK(_t)           \
+{                                \
+    processor:   0,              \
+    domain:      IDLE_DOMAIN_ID, \
+    state:       TASK_RUNNING,   \
+    has_cpu:     0,              \
+    mm:          IDLE0_MM,       \
+    addr_limit:  KERNEL_DS,      \
+    active_mm:   &idle0_task.mm, \
+    thread:      INIT_THREAD,    \
+    prev_task:   &(_t),          \
+    next_task:   &(_t)           \
+}
+
+#define IDLE_DOMAIN_ID   (~0)
+#define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID)
+
+#ifndef IDLE0_TASK_SIZE
+#define IDLE0_TASK_SIZE	2048*sizeof(long)
+#endif
+
+union task_union {
+    struct task_struct task;
+    unsigned long stack[IDLE0_TASK_SIZE/sizeof(long)];
+};
+
+extern union task_union idle0_task_union;
+extern struct task_struct first_task_struct;
+
+extern struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu);
+extern int setup_guestos(struct task_struct *p, dom0_newdomain_t *params);
+extern int final_setup_guestos(struct task_struct *p, dom_meminfo_t *);
+
+struct task_struct *find_domain_by_id(unsigned int dom);
+extern void release_task(struct task_struct *);
+extern void kill_domain(void);
+extern void kill_domain_with_errmsg(const char *err);
+extern long kill_other_domain(unsigned int dom);
+
+/* arch/process.c */
+void new_thread(struct task_struct *p,
+                unsigned long start_pc,
+                unsigned long start_stack,
+                unsigned long start_info);
+extern void flush_thread(void);
+extern void exit_thread(void);
+
+/* Linux puts these here for some reason! */
+extern int request_irq(unsigned int,
+                       void (*handler)(int, void *, struct pt_regs *),
+                       unsigned long, const char *, void *);
+extern void free_irq(unsigned int, void *);
+
+extern unsigned long wait_init_idle;
+#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
+
+
+
+/*
+ * Scheduler functions (in schedule.c)
+ */
+#define set_current_state(_s) do { current->state = (_s); } while (0)
+#define MAX_SCHEDULE_TIMEOUT LONG_MAX
+void scheduler_init(void);
+void schedulers_start(void);
+void sched_add_domain(struct task_struct *p);
+void sched_rem_domain(struct task_struct *p);
+int  wake_up(struct task_struct *p);
+long schedule_timeout(long timeout);
+long do_yield(void);
+void reschedule(struct task_struct *p);
+asmlinkage void schedule(void);
+
+
+#define signal_pending(_p) ((_p)->hyp_events || \
+                            (_p)->shared_info->events)
+
+void domain_init(void);
+
+void cpu_idle(void);
+
+#define REMOVE_LINKS(p) do { \
+        (p)->next_task->prev_task = (p)->prev_task; \
+        (p)->prev_task->next_task = (p)->next_task; \
+        } while (0)
+
+#define SET_LINKS(p) do { \
+        (p)->next_task = &idle0_task; \
+        (p)->prev_task = idle0_task.prev_task; \
+        idle0_task.prev_task->next_task = (p); \
+        idle0_task.prev_task = (p); \
+        } while (0)
+
+extern void update_process_times(int user);
+
+#endif
diff --git a/xen/include/xeno/skbuff.h b/xen/include/xeno/skbuff.h
new file mode 100644
index 0000000000..519328a679
--- /dev/null
+++ b/xen/include/xeno/skbuff.h
@@ -0,0 +1,434 @@
+/*
+ *	Definitions for the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+ 
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+/* vif special values */
+#define VIF_PHYSICAL_INTERFACE  -1
+#define VIF_UNKNOWN_INTERFACE   -2
+#define VIF_DROP                -3
+#define VIF_ANY_INTERFACE       -4
+
+/* skb_type values */
+#define SKB_NORMAL               0 /* A Linux-style skbuff: no strangeness */
+#define SKB_ZERO_COPY            1 /* Zero copy skbs are used for receive  */
+#define SKB_NODATA               2 /* Data allocation not handled by us    */
+
+#define HAVE_ALLOC_SKB		/* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
+#define SLAB_SKB 		/* Slabified skbuffs 	   */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
+
+/* A. Checksumming of received packets by device.
+ *
+ *	NONE: device failed to checksum this packet.
+ *		skb->csum is undefined.
+ *
+ *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ *		skb->csum is undefined.
+ *	      It is bad option, but, unfortunately, many of vendors do this.
+ *	      Apparently with secret goal to sell you new device, when you
+ *	      will add new protocol to your host. F.e. IPv6. 8)
+ *
+ *	HW: the most generic way. Device supplied checksum of _all_
+ *	    the packet as seen by netif_rx in skb->csum.
+ *	    NOTE: Even if device supports only some protocols, but
+ *	    is able to produce some skb->csum, it MUST use HW,
+ *	    not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ *	NONE: skb is checksummed by protocol or csum is not required.
+ *
+ *	HW: device is required to csum packet as seen by hard_start_xmit
+ *	from skb->h.raw to the end and to record the checksum
+ *	at skb->h.raw+skb->csum.
+ *
+ *	Device must show its capabilities in dev->features, set
+ *	at device setup time.
+ *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
+ *			  everything.
+ *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ *			  TCP/UDP over IPv4. Sigh. Vendors like this
+ *			  way by an unknown reason. Though, see comment above
+ *			  about CHECKSUM_UNNECESSARY. 8)
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void**)&arg)-1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+struct sk_buff_head {
+    /* These two members must be first. */
+    struct sk_buff	* next;
+    struct sk_buff	* prev;
+
+    __u32		qlen;
+    spinlock_t	lock;
+};
+
+#define MAX_SKB_FRAGS 1 /* KAF: was 6 */
+
+typedef struct skb_frag_struct {
+    struct pfn_info *page;
+    __u16 page_offset;
+    __u16 size;
+} skb_frag_t;
+
+struct skb_shared_info {
+    unsigned int nr_frags;
+    skb_frag_t	frags[MAX_SKB_FRAGS];
+};
+
+struct sk_buff {
+    /* These two members must be first. */
+    struct sk_buff	* next;			/* Next buffer in list 				*/
+    struct sk_buff	* prev;			/* Previous buffer in list 			*/
+
+    struct sk_buff_head * list;		/* List we are on				*/
+    struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
+
+    /* Transport layer header */
+    union
+    {
+        struct tcphdr	*th;
+        struct udphdr	*uh;
+        struct icmphdr	*icmph;
+        struct igmphdr	*igmph;
+        struct iphdr	*ipiph;
+        struct spxhdr	*spxh;
+        unsigned char	*raw;
+    } h;
+
+    /* Network layer header */
+    union
+    {
+        struct iphdr	*iph;
+        struct ipv6hdr	*ipv6h;
+        struct arphdr	*arph;
+        struct ipxhdr	*ipxh;
+        unsigned char	*raw;
+    } nh;
+  
+    /* Link layer header */
+    union 
+    {	
+        struct ethhdr	*ethernet;
+        unsigned char 	*raw;
+    } mac;
+
+    unsigned int 	len;			/* Length of actual data			*/
+    unsigned int 	data_len;
+    unsigned int	csum;			/* Checksum 					*/
+    unsigned char 	skb_type,
+        pkt_type,		/* Packet class					*/
+        ip_summed;		/* Driver fed us an IP checksum			*/
+    unsigned short	protocol;		/* Packet protocol from driver. 		*/
+    unsigned char	*head;			/* Head of buffer 				*/
+    unsigned char	*data;			/* Data head pointer				*/
+    unsigned char	*tail;			/* Tail pointer					*/
+    unsigned char 	*end;			/* End pointer					*/
+
+    void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
+    struct pfn_info *pf;                    /* record of physical pf address for freeing    */
+    int src_vif;                            /* vif we came from                             */
+    int dst_vif;                            /* vif we are bound for                         */
+    struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
+};
+
+extern void	       __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *alloc_skb(unsigned int size, int priority);
+extern struct sk_buff *alloc_skb_nodata(int priority);
+extern struct sk_buff *alloc_zc_skb(unsigned int size, int priority);
+extern void	       kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
+#define dev_kfree_skb(a)	kfree_skb(a)
+extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
+extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
+
+/* In Xen, we don't clone skbs, so shared data can go in the sk_buff struct. */
+#define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
+
+/**
+ *	kfree_skb - free an sk_buff
+ *	@skb: buffer to free
+ *
+ *	Drop a reference to the buffer and free it if the usage count has
+ *	hit zero.
+ */
+ 
+static inline void kfree_skb(struct sk_buff *skb)
+{
+    __kfree_skb(skb);
+}
+
+/**
+ *	skb_queue_len	- get queue length
+ *	@list_: list to measure
+ *
+ *	Return the length of an &sk_buff queue. 
+ */
+ 
+static inline __u32 skb_queue_len(struct sk_buff_head *list_)
+{
+    return(list_->qlen);
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+    spin_lock_init(&list->lock);
+    list->prev = (struct sk_buff *)list;
+    list->next = (struct sk_buff *)list;
+    list->qlen = 0;
+}
+
+/**
+ *	__skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of a list. This function takes no locks
+ *	and you must therefore hold required locks before calling it.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */	
+ 
+static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+    struct sk_buff *prev, *next;
+
+    newsk->list = list;
+    list->qlen++;
+    prev = (struct sk_buff *)list;
+    next = prev->next;
+    newsk->next = next;
+    newsk->prev = prev;
+    next->prev = newsk;
+    prev->next = newsk;
+}
+
+/**
+ *	__skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. This function does not take any locks
+ *	so must be used with appropriate locks held only. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+    struct sk_buff *next, *prev, *result;
+
+    prev = (struct sk_buff *) list;
+    next = prev->next;
+    result = NULL;
+    if (next != prev) {
+        result = next;
+        next = next->next;
+        list->qlen--;
+        next->prev = prev;
+        prev->next = next;
+        result->next = NULL;
+        result->prev = NULL;
+        result->list = NULL;
+    }
+    return result;
+}
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+    return skb->data_len;
+}
+
+#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
+
+/*
+ *	Add data to an sk_buff
+ */
+ 
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+    unsigned char *tmp=skb->tail;
+    SKB_LINEAR_ASSERT(skb);
+    skb->tail+=len;
+    skb->len+=len;
+    return tmp;
+}
+
+/**
+ *	skb_put - add data to a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer. If this would
+ *	exceed the total buffer size the kernel will panic. A pointer to the
+ *	first byte of the extra data is returned.
+ */
+ 
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+    unsigned char *tmp=skb->tail;
+    SKB_LINEAR_ASSERT(skb);
+    skb->tail+=len;
+    skb->len+=len;
+    if(skb->tail>skb->end) {
+        skb_over_panic(skb, len, current_text_addr());
+    }
+    return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+    skb->data-=len;
+    skb->len+=len;
+    return skb->data;
+}
+
+/**
+ *	skb_push - add data to the start of a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the buffer at the buffer
+ *	start. If this would exceed the total buffer headroom the kernel will
+ *	panic. A pointer to the first byte of the extra data is returned.
+ */
+
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+    skb->data-=len;
+    skb->len+=len;
+    if(skb->data<skb->head) {
+        skb_under_panic(skb, len, current_text_addr());
+    }
+    return skb->data;
+}
+
+static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+    skb->len-=len;
+    if (skb->len < skb->data_len)
+        BUG();
+    return 	skb->data+=len;
+}
+
+/**
+ *	skb_pull - remove data from the start of a buffer
+ *	@skb: buffer to use 
+ *	@len: amount of data to remove
+ *
+ *	This function removes data from the start of a buffer, returning
+ *	the memory to the headroom. A pointer to the next data in the buffer
+ *	is returned. Once the data has been pulled future pushes will overwrite
+ *	the old data.
+ */
+
+static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
+{	
+    if (len > skb->len)
+        return NULL;
+    return __skb_pull(skb,len);
+}
+
+/**
+ *	skb_reserve - adjust headroom
+ *	@skb: buffer to alter
+ *	@len: bytes to move
+ *
+ *	Increase the headroom of an empty &sk_buff by reducing the tail
+ *	room. This is only allowed for an empty buffer.
+ */
+
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+    skb->data+=len;
+    skb->tail+=len;
+}
+
+/**
+ *	__dev_alloc_skb - allocate an skbuff for sending
+ *	@length: length to allocate
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned in there is no free memory.
+ */
+ 
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+					      int gfp_mask)
+{
+    struct sk_buff *skb;
+    skb = alloc_zc_skb(length+16, gfp_mask);
+    if (skb)
+        skb_reserve(skb,16);
+    return skb;
+}
+
+/**
+ *	dev_alloc_skb - allocate an skbuff for sending
+ *	@length: length to allocate
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned in there is no free memory. Although this function
+ *	allocates memory it can be called from an interrupt.
+ */
+ 
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+    return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+    return page_address(frag->page);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+}
+
+extern int skb_copy_bits(const struct sk_buff *skb, 
+                         int offset, void *to, int len);
+extern void skb_init(void);
+
+#endif	/* _LINUX_SKBUFF_H */
diff --git a/xen/include/xeno/slab.h b/xen/include/xeno/slab.h
new file mode 100644
index 0000000000..21a53051f1
--- /dev/null
+++ b/xen/include/xeno/slab.h
@@ -0,0 +1,84 @@
+/*
+ * linux/mm/slab.h
+ * Written by Mark Hemment, 1996.
+ * (markhe@nextd.demon.co.uk)
+ */
+
+#if	!defined(_LINUX_SLAB_H)
+#define	_LINUX_SLAB_H
+
+typedef struct kmem_cache_s kmem_cache_t;
+
+#include	<xeno/mm.h>
+#include	<xeno/cache.h>
+
+/* flags for kmem_cache_alloc() */
+#define	SLAB_NOFS		GFP_NOFS
+#define	SLAB_NOIO		GFP_NOIO
+#define SLAB_NOHIGHIO		GFP_NOHIGHIO
+#define	SLAB_ATOMIC		GFP_ATOMIC
+#define	SLAB_USER		GFP_USER
+#define	SLAB_KERNEL		GFP_KERNEL
+#define	SLAB_NFS		GFP_NFS
+#define	SLAB_DMA		GFP_DMA
+
+#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS)
+#define	SLAB_NO_GROW		0x00001000UL	/* don't grow a cache */
+
+/* flags to pass to kmem_cache_create().
+ * The first 3 are only valid when the allocator as been build
+ * SLAB_DEBUG_SUPPORT.
+ */
+#define	SLAB_DEBUG_FREE		0x00000100UL	/* Peform (expensive) checks on free */
+#define	SLAB_DEBUG_INITIAL	0x00000200UL	/* Call constructor (as verifier) */
+#define	SLAB_RED_ZONE		0x00000400UL	/* Red zone objs in a cache */
+#define	SLAB_POISON		0x00000800UL	/* Poison objects */
+#define	SLAB_NO_REAP		0x00001000UL	/* never reap from the cache */
+#define	SLAB_HWCACHE_ALIGN	0x00002000UL	/* align objs on a h/w cache lines */
+#define SLAB_CACHE_DMA		0x00004000UL	/* use GFP_DMA memory */
+
+/* flags passed to a constructor func */
+#define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
+#define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
+#define	SLAB_CTOR_VERIFY	0x004UL		/* tell constructor it's a verify call */
+
+/* prototypes */
+extern void kmem_cache_init(void);
+extern void kmem_cache_sizes_init(unsigned long);
+
+extern kmem_cache_t *kmem_find_general_cachep(size_t, int gfpflags);
+extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
+				       void (*)(void *, kmem_cache_t *, unsigned long),
+				       void (*)(void *, kmem_cache_t *, unsigned long));
+extern int kmem_cache_destroy(kmem_cache_t *);
+extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc(kmem_cache_t *, int);
+extern void kmem_cache_free(kmem_cache_t *, void *);
+
+extern void *kmalloc(size_t, int);
+extern void kfree(const void *);
+
+extern int FASTCALL(kmem_cache_reap(int));
+
+extern void dump_slabinfo();
+
+#if 0
+extern int slabinfo_read_proc(char *page, char **start, off_t off,
+				 int count, int *eof, void *data);
+extern int slabinfo_write_proc(struct file *file, const char *buffer,
+			   unsigned long count, void *data);
+#endif
+
+/* System wide caches */
+extern kmem_cache_t	*vm_area_cachep;
+extern kmem_cache_t	*mm_cachep;
+extern kmem_cache_t	*names_cachep;
+extern kmem_cache_t	*files_cachep;
+extern kmem_cache_t	*filp_cachep;
+extern kmem_cache_t	*dquot_cachep;
+extern kmem_cache_t	*bh_cachep;
+extern kmem_cache_t	*fs_cachep;
+extern kmem_cache_t	*sigact_cachep;
+
+
+#endif	/* _LINUX_SLAB_H */
diff --git a/xen/include/xeno/smp.h b/xen/include/xeno/smp.h
new file mode 100644
index 0000000000..786026649e
--- /dev/null
+++ b/xen/include/xeno/smp.h
@@ -0,0 +1,88 @@
+#ifndef __LINUX_SMP_H
+#define __LINUX_SMP_H
+
+/*
+ *	Generic SMP support
+ *		Alan Cox. <alan@redhat.com>
+ */
+
+#include <xeno/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/smp.h>
+
+/*
+ * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
+ * (defined in asm header):
+ */ 
+
+/*
+ * stops all CPUs but the current one:
+ */
+extern void smp_send_stop(void);
+
+extern void FASTCALL(smp_send_event_check_mask(unsigned long cpu_mask));
+#define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
+
+
+/*
+ * Boot processor call to load the other CPU's
+ */
+extern void smp_boot_cpus(void);
+
+/*
+ * Processor call in. Must hold processors until ..
+ */
+extern void smp_callin(void);
+
+/*
+ * Multiprocessors may now schedule
+ */
+extern void smp_commence(void);
+
+/*
+ * Call a function on all other processors
+ */
+extern int smp_call_function (void (*func) (void *info), void *info,
+			      int retry, int wait);
+
+/*
+ * True once the per process idle is forked
+ */
+extern int smp_threads_ready;
+
+extern int smp_num_cpus;
+
+extern volatile unsigned long smp_msg_data;
+extern volatile int smp_src_cpu;
+extern volatile int smp_msg_id;
+
+#define MSG_ALL_BUT_SELF	0x8000	/* Assume <32768 CPU's */
+#define MSG_ALL			0x8001
+
+#define MSG_INVALIDATE_TLB	0x0001	/* Remote processor TLB invalidate */
+#define MSG_STOP_CPU		0x0002	/* Sent to shut down slave CPU's
+					 * when rebooting
+					 */
+#define MSG_RESCHEDULE		0x0003	/* Reschedule request from master CPU*/
+#define MSG_CALL_FUNCTION       0x0004  /* Call function on all other CPUs */
+
+#else
+
+/*
+ *	These macros fold the SMP functionality into a single CPU system
+ */
+ 
+#define smp_num_cpus				1
+#define smp_processor_id()			0
+#define hard_smp_processor_id()			0
+#define smp_threads_ready			1
+#define kernel_lock()
+#define cpu_logical_map(cpu)			0
+#define cpu_number_map(cpu)			0
+#define smp_call_function(func,info,retry,wait)	({ 0; })
+#define cpu_online_map				1
+
+#endif
+#endif
diff --git a/xen/include/xeno/socket.h b/xen/include/xeno/socket.h
new file mode 100644
index 0000000000..92e7b93ae5
--- /dev/null
+++ b/xen/include/xeno/socket.h
@@ -0,0 +1,136 @@
+#ifndef _LINUX_SOCKET_H
+#define _LINUX_SOCKET_H
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+//#include <asm/socket.h>			/* arch-dependent defines	*/
+#include <linux/sockios.h>		/* the SIOCxxx I/O controls	*/
+//#include <linux/uio.h>			/* iovec support		*/
+#include <linux/types.h>		/* pid_t			*/
+
+typedef unsigned short	sa_family_t;
+
+/*
+ *	1003.1g requires sa_family_t and that sa_data is char.
+ */
+ 
+struct sockaddr {
+	sa_family_t	sa_family;	/* address family, AF_xxx	*/
+	char		sa_data[14];	/* 14 bytes of protocol address	*/
+};
+
+/* Supported address families. */
+#define AF_UNSPEC	0
+#define AF_UNIX		1	/* Unix domain sockets 		*/
+#define AF_LOCAL	1	/* POSIX name for AF_UNIX	*/
+#define AF_INET		2	/* Internet IP Protocol 	*/
+#define AF_AX25		3	/* Amateur Radio AX.25 		*/
+#define AF_IPX		4	/* Novell IPX 			*/
+#define AF_APPLETALK	5	/* AppleTalk DDP 		*/
+#define AF_NETROM	6	/* Amateur Radio NET/ROM 	*/
+#define AF_BRIDGE	7	/* Multiprotocol bridge 	*/
+#define AF_ATMPVC	8	/* ATM PVCs			*/
+#define AF_X25		9	/* Reserved for X.25 project 	*/
+#define AF_INET6	10	/* IP version 6			*/
+#define AF_ROSE		11	/* Amateur Radio X.25 PLP	*/
+#define AF_DECnet	12	/* Reserved for DECnet project	*/
+#define AF_NETBEUI	13	/* Reserved for 802.2LLC project*/
+#define AF_SECURITY	14	/* Security callback pseudo AF */
+#define AF_KEY		15      /* PF_KEY key management API */
+#define AF_NETLINK	16
+#define AF_ROUTE	AF_NETLINK /* Alias to emulate 4.4BSD */
+#define AF_PACKET	17	/* Packet family		*/
+#define AF_ASH		18	/* Ash				*/
+#define AF_ECONET	19	/* Acorn Econet			*/
+#define AF_ATMSVC	20	/* ATM SVCs			*/
+#define AF_SNA		22	/* Linux SNA Project (nutters!) */
+#define AF_IRDA		23	/* IRDA sockets			*/
+#define AF_PPPOX	24	/* PPPoX sockets		*/
+#define AF_WANPIPE	25	/* Wanpipe API Sockets */
+#define AF_BLUETOOTH	31	/* Bluetooth sockets 		*/
+#define AF_MAX		32	/* For now.. */
+
+/* Protocol families, same as address families. */
+#define PF_UNSPEC	AF_UNSPEC
+#define PF_UNIX		AF_UNIX
+#define PF_LOCAL	AF_LOCAL
+#define PF_INET		AF_INET
+#define PF_AX25		AF_AX25
+#define PF_IPX		AF_IPX
+#define PF_APPLETALK	AF_APPLETALK
+#define	PF_NETROM	AF_NETROM
+#define PF_BRIDGE	AF_BRIDGE
+#define PF_ATMPVC	AF_ATMPVC
+#define PF_X25		AF_X25
+#define PF_INET6	AF_INET6
+#define PF_ROSE		AF_ROSE
+#define PF_DECnet	AF_DECnet
+#define PF_NETBEUI	AF_NETBEUI
+#define PF_SECURITY	AF_SECURITY
+#define PF_KEY		AF_KEY
+#define PF_NETLINK	AF_NETLINK
+#define PF_ROUTE	AF_ROUTE
+#define PF_PACKET	AF_PACKET
+#define PF_ASH		AF_ASH
+#define PF_ECONET	AF_ECONET
+#define PF_ATMSVC	AF_ATMSVC
+#define PF_SNA		AF_SNA
+#define PF_IRDA		AF_IRDA
+#define PF_PPPOX	AF_PPPOX
+#define PF_WANPIPE	AF_WANPIPE
+#define PF_BLUETOOTH	AF_BLUETOOTH
+#define PF_MAX		AF_MAX
+
+/* Maximum queue length specifiable by listen.  */
+#define SOMAXCONN	128
+
+/* Flags we can use with send/ and recv. 
+   Added those for 1003.1g not all are supported yet
+ */
+ 
+#define MSG_OOB		1
+#define MSG_PEEK	2
+#define MSG_DONTROUTE	4
+#define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC	8
+#define MSG_PROBE	0x10	/* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC	0x20
+#define MSG_DONTWAIT	0x40	/* Nonblocking io		 */
+#define MSG_EOR         0x80	/* End of record */
+#define MSG_WAITALL	0x100	/* Wait for a full request */
+#define MSG_FIN         0x200
+#define MSG_SYN		0x400
+#define MSG_CONFIRM	0x800	/* Confirm path validity */
+#define MSG_RST		0x1000
+#define MSG_ERRQUEUE	0x2000	/* Fetch message from error queue */
+#define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
+#define MSG_MORE	0x8000	/* Sender will send more */
+
+#define MSG_EOF         MSG_FIN
+
+
+/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+#define SOL_IP		0
+/* #define SOL_ICMP	1	No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
+#define SOL_TCP		6
+#define SOL_UDP		17
+#define SOL_IPV6	41
+#define SOL_ICMPV6	58
+#define SOL_RAW		255
+#define SOL_IPX		256
+#define SOL_AX25	257
+#define SOL_ATALK	258
+#define SOL_NETROM	259
+#define SOL_ROSE	260
+#define SOL_DECNET	261
+#define	SOL_X25		262
+#define SOL_PACKET	263
+#define SOL_ATM		264	/* ATM layer (cell level) */
+#define SOL_AAL		265	/* ATM Adaption Layer (packet level) */
+#define SOL_IRDA        266
+
+/* IPX options */
+#define IPX_TYPE	1
+
+#endif /* not kernel and not glibc */
+#endif /* _LINUX_SOCKET_H */
diff --git a/xen/include/xeno/sockios.h b/xen/include/xeno/sockios.h
new file mode 100644
index 0000000000..a0ad8100bf
--- /dev/null
+++ b/xen/include/xeno/sockios.h
@@ -0,0 +1,132 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions of the socket-level I/O control calls.
+ *
+ * Version:	@(#)sockios.h	1.0.2	03/09/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_SOCKIOS_H
+#define _LINUX_SOCKIOS_H
+
+//#include <asm/sockios.h>
+
+/* Linux-specific socket ioctls */
+#define SIOCINQ		FIONREAD
+#define SIOCOUTQ	TIOCOUTQ
+
+/* Routing table calls. */
+#define SIOCADDRT	0x890B		/* add routing table entry	*/
+#define SIOCDELRT	0x890C		/* delete routing table entry	*/
+#define SIOCRTMSG	0x890D		/* call to routing system	*/
+
+/* Socket configuration controls. */
+#define SIOCGIFNAME	0x8910		/* get iface name		*/
+#define SIOCSIFLINK	0x8911		/* set iface channel		*/
+#define SIOCGIFCONF	0x8912		/* get iface list		*/
+#define SIOCGIFFLAGS	0x8913		/* get flags			*/
+#define SIOCSIFFLAGS	0x8914		/* set flags			*/
+#define SIOCGIFADDR	0x8915		/* get PA address		*/
+#define SIOCSIFADDR	0x8916		/* set PA address		*/
+#define SIOCGIFDSTADDR	0x8917		/* get remote PA address	*/
+#define SIOCSIFDSTADDR	0x8918		/* set remote PA address	*/
+#define SIOCGIFBRDADDR	0x8919		/* get broadcast PA address	*/
+#define SIOCSIFBRDADDR	0x891a		/* set broadcast PA address	*/
+#define SIOCGIFNETMASK	0x891b		/* get network PA mask		*/
+#define SIOCSIFNETMASK	0x891c		/* set network PA mask		*/
+#define SIOCGIFMETRIC	0x891d		/* get metric			*/
+#define SIOCSIFMETRIC	0x891e		/* set metric			*/
+#define SIOCGIFMEM	0x891f		/* get memory address (BSD)	*/
+#define SIOCSIFMEM	0x8920		/* set memory address (BSD)	*/
+#define SIOCGIFMTU	0x8921		/* get MTU size			*/
+#define SIOCSIFMTU	0x8922		/* set MTU size			*/
+#define SIOCSIFNAME	0x8923		/* set interface name */
+#define	SIOCSIFHWADDR	0x8924		/* set hardware address 	*/
+#define SIOCGIFENCAP	0x8925		/* get/set encapsulations       */
+#define SIOCSIFENCAP	0x8926		
+#define SIOCGIFHWADDR	0x8927		/* Get hardware address		*/
+#define SIOCGIFSLAVE	0x8929		/* Driver slaving support	*/
+#define SIOCSIFSLAVE	0x8930
+#define SIOCADDMULTI	0x8931		/* Multicast address lists	*/
+#define SIOCDELMULTI	0x8932
+#define SIOCGIFINDEX	0x8933		/* name -> if_index mapping	*/
+#define SIOGIFINDEX	SIOCGIFINDEX	/* misprint compatibility :-)	*/
+#define SIOCSIFPFLAGS	0x8934		/* set/get extended flags set	*/
+#define SIOCGIFPFLAGS	0x8935
+#define SIOCDIFADDR	0x8936		/* delete PA address		*/
+#define	SIOCSIFHWBROADCAST	0x8937	/* set hardware broadcast addr	*/
+#define SIOCGIFCOUNT	0x8938		/* get number of devices */
+
+#define SIOCGIFBR	0x8940		/* Bridging support		*/
+#define SIOCSIFBR	0x8941		/* Set bridging options 	*/
+
+#define SIOCGIFDIVERT	0x8944		/* Frame diversion support */
+#define SIOCSIFDIVERT	0x8945		/* Set frame diversion options */
+
+#define SIOCETHTOOL	0x8946		/* Ethtool interface		*/
+
+#define SIOCGMIIPHY	0x8947		/* Get address of MII PHY in use. */
+#define SIOCGMIIREG	0x8948		/* Read MII PHY register.	*/
+#define SIOCSMIIREG	0x8949		/* Write MII PHY register.	*/
+
+/* ARP cache control calls. */
+		    /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
+#define SIOCDARP	0x8953		/* delete ARP table entry	*/
+#define SIOCGARP	0x8954		/* get ARP table entry		*/
+#define SIOCSARP	0x8955		/* set ARP table entry		*/
+
+/* RARP cache control calls. */
+#define SIOCDRARP	0x8960		/* delete RARP table entry	*/
+#define SIOCGRARP	0x8961		/* get RARP table entry		*/
+#define SIOCSRARP	0x8962		/* set RARP table entry		*/
+
+/* Driver configuration calls */
+
+#define SIOCGIFMAP	0x8970		/* Get device parameters	*/
+#define SIOCSIFMAP	0x8971		/* Set device parameters	*/
+
+/* DLCI configuration calls */
+
+#define SIOCADDDLCI	0x8980		/* Create new DLCI device	*/
+#define SIOCDELDLCI	0x8981		/* Delete DLCI device		*/
+
+#define SIOCGIFVLAN	0x8982		/* 802.1Q VLAN support		*/
+#define SIOCSIFVLAN	0x8983		/* Set 802.1Q VLAN options 	*/
+
+/* bonding calls */
+
+#define SIOCBONDENSLAVE	0x8990		/* enslave a device to the bond */
+#define SIOCBONDRELEASE 0x8991		/* release a slave from the bond*/
+#define SIOCBONDSETHWADDR      0x8992	/* set the hw addr of the bond  */
+#define SIOCBONDSLAVEINFOQUERY 0x8993   /* rtn info about slave state   */
+#define SIOCBONDINFOQUERY      0x8994	/* rtn info about bond state    */
+#define SIOCBONDCHANGEACTIVE   0x8995   /* update to a new active slave */
+			
+/* Device private ioctl calls */
+
+/*
+ *	These 16 ioctls are available to devices via the do_ioctl() device
+ *	vector. Each device should include this file and redefine these names
+ *	as their own. Because these are device dependent it is a good idea
+ *	_NOT_ to issue them to random objects and hope.
+ *
+ *	THESE IOCTLS ARE _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X -DaveM
+ */
+ 
+#define SIOCDEVPRIVATE	0x89F0	/* to 89FF */
+
+/*
+ *	These 16 ioctl calls are protocol private
+ */
+ 
+#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#endif	/* _LINUX_SOCKIOS_H */
diff --git a/xen/include/xeno/spinlock.h b/xen/include/xeno/spinlock.h
new file mode 100644
index 0000000000..08f2eb6098
--- /dev/null
+++ b/xen/include/xeno/spinlock.h
@@ -0,0 +1,142 @@
+#ifndef __LINUX_SPINLOCK_H
+#define __LINUX_SPINLOCK_H
+
+#include <xeno/config.h>
+#include <asm/system.h>
+
+/*
+ * These are the generic versions of the spinlocks and read-write
+ * locks..
+ */
+#define spin_lock_irqsave(lock, flags)		do { local_irq_save(flags);       spin_lock(lock); } while (0)
+#define spin_lock_irq(lock)			do { local_irq_disable();         spin_lock(lock); } while (0)
+#define spin_lock_bh(lock)			do { local_bh_disable();          spin_lock(lock); } while (0)
+
+#define read_lock_irqsave(lock, flags)		do { local_irq_save(flags);       read_lock(lock); } while (0)
+#define read_lock_irq(lock)			do { local_irq_disable();         read_lock(lock); } while (0)
+#define read_lock_bh(lock)			do { local_bh_disable();          read_lock(lock); } while (0)
+
+#define write_lock_irqsave(lock, flags)		do { local_irq_save(flags);      write_lock(lock); } while (0)
+#define write_lock_irq(lock)			do { local_irq_disable();        write_lock(lock); } while (0)
+#define write_lock_bh(lock)			do { local_bh_disable();         write_lock(lock); } while (0)
+
+#define spin_unlock_irqrestore(lock, flags)	do { spin_unlock(lock);  local_irq_restore(flags); } while (0)
+#define spin_unlock_irq(lock)			do { spin_unlock(lock);  local_irq_enable();       } while (0)
+#define spin_unlock_bh(lock)			do { spin_unlock(lock);  local_bh_enable();        } while (0)
+
+#define read_unlock_irqrestore(lock, flags)	do { read_unlock(lock);  local_irq_restore(flags); } while (0)
+#define read_unlock_irq(lock)			do { read_unlock(lock);  local_irq_enable();       } while (0)
+#define read_unlock_bh(lock)			do { read_unlock(lock);  local_bh_enable();        } while (0)
+
+#define write_unlock_irqrestore(lock, flags)	do { write_unlock(lock); local_irq_restore(flags); } while (0)
+#define write_unlock_irq(lock)			do { write_unlock(lock); local_irq_enable();       } while (0)
+#define write_unlock_bh(lock)			do { write_unlock(lock); local_bh_enable();        } while (0)
+#define spin_trylock_bh(lock)			({ int __r; local_bh_disable();\
+						__r = spin_trylock(lock);      \
+						if (!__r) local_bh_enable();   \
+						__r; })
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+
+#elif !defined(spin_lock_init) /* !SMP and spin_lock_init not previously
+                                  defined (e.g. by including asm/spinlock.h */
+
+#define DEBUG_SPINLOCKS	0	/* 0 == no debugging, 1 == maintain lock state, 2 == full debug */
+
+#if (DEBUG_SPINLOCKS < 1)
+
+#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
+#define ATOMIC_DEC_AND_LOCK
+
+/*
+ * Your basic spinlocks, allowing only a single CPU anywhere
+ *
+ * Most gcc versions have a nasty bug with empty initializers.
+ */
+#if (__GNUC__ > 2)
+  typedef struct { } spinlock_t;
+  #define SPIN_LOCK_UNLOCKED (spinlock_t) { }
+#else
+  typedef struct { int gcc_is_buggy; } spinlock_t;
+  #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
+#endif
+
+#define spin_lock_init(lock)	do { } while(0)
+#define spin_lock(lock)		(void)(lock) /* Not "unused variable". */
+#define spin_is_locked(lock)	(0)
+#define spin_trylock(lock)	({1; })
+#define spin_unlock_wait(lock)	do { } while(0)
+#define spin_unlock(lock)	do { } while(0)
+
+#elif (DEBUG_SPINLOCKS < 2)
+
+typedef struct {
+	volatile unsigned long lock;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
+
+#define spin_lock_init(x)	do { (x)->lock = 0; } while (0)
+#define spin_is_locked(lock)	(test_bit(0,(lock)))
+#define spin_trylock(lock)	(!test_and_set_bit(0,(lock)))
+
+#define spin_lock(x)		do { (x)->lock = 1; } while (0)
+#define spin_unlock_wait(x)	do { } while (0)
+#define spin_unlock(x)		do { (x)->lock = 0; } while (0)
+
+#else /* (DEBUG_SPINLOCKS >= 2) */
+
+typedef struct {
+	volatile unsigned long lock;
+	volatile unsigned int babble;
+	const char *module;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0, 25, __BASE_FILE__ }
+
+/*#include <linux/kernel.h>*/
+
+#define spin_lock_init(x)	do { (x)->lock = 0; } while (0)
+#define spin_is_locked(lock)	(test_bit(0,(lock)))
+#define spin_trylock(lock)	(!test_and_set_bit(0,(lock)))
+
+#define spin_lock(x)		do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0)
+#define spin_unlock_wait(x)	do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock_wait(%s:%p) deadlock\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} restore_flags(__spinflags);} while (0)
+#define spin_unlock(x)		do {unsigned long __spinflags; save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0)
+
+#endif	/* DEBUG_SPINLOCKS */
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * Most gcc versions have a nasty bug with empty initializers.
+ */
+#if (__GNUC__ > 2)
+  typedef struct { } rwlock_t;
+  #define RW_LOCK_UNLOCKED (rwlock_t) { }
+#else
+  typedef struct { int gcc_is_buggy; } rwlock_t;
+  #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
+#endif
+
+#define rwlock_init(lock)	do { } while(0)
+#define read_lock(lock)		(void)(lock) /* Not "unused variable". */
+#define read_unlock(lock)	do { } while(0)
+#define write_lock(lock)	(void)(lock) /* Not "unused variable". */
+#define write_unlock(lock)	do { } while(0)
+
+#endif /* !SMP */
+
+/* "lock on reference count zero" */
+#ifndef ATOMIC_DEC_AND_LOCK
+#include <asm/atomic.h>
+extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+#endif
+
+#endif /* __LINUX_SPINLOCK_H */
diff --git a/xen/include/xeno/time.h b/xen/include/xeno/time.h
new file mode 100644
index 0000000000..a017b0d2b0
--- /dev/null
+++ b/xen/include/xeno/time.h
@@ -0,0 +1,98 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ *        File: time.h
+ *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *     Changes: 
+ *              
+ *        Date: Nov 2002
+ * 
+ * Environment: Xen Hypervisor
+ * Description: This file provides a one stop shop for all time related
+ *              issues within the hypervisor. 
+ * 
+ *              The Hypervisor provides the following notions of time:
+ *              Cycle Counter Time, System Time, Wall Clock Time, and 
+ *              Domain Virtual Time.
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+
+
+#ifndef __XENO_TIME_H__
+#define __XENO_TIME_H__
+
+#include <asm/ptrace.h>  /* XXX Only used for do_timer which should be moved */
+#include <asm/time.h>    /* pull in architecture specific time definition */
+#include <xeno/types.h>
+
+/*
+ * Init time
+ */
+extern int init_xeno_time();
+
+/*
+ * Cycle Counter Time (defined in asm/time.h)
+ */
+
+
+extern u64	cpu_freq;
+
+/*
+ * System Time
+ * 64 bit value containing the nanoseconds elapsed since boot time.
+ * This value is adjusted by frequency drift.
+ * NOW() returns the current time.
+ * The other macros are for convenience to approximate short intervals
+ * of real time into system time 
+ */
+
+s_time_t get_s_time(void);
+
+#define NOW()				((s_time_t)get_s_time())
+#define SECONDS(_s)			(((s_time_t)(_s))  * 1000000000UL )
+#define TENTHS(_ts)			(((s_time_t)(_ts)) * 100000000UL )
+#define HUNDREDTHS(_hs)		(((s_time_t)(_hs)) * 10000000UL )
+#define MILLISECS(_ms)		(((s_time_t)(_ms)) * 1000000UL )
+#define MICROSECS(_us)		(((s_time_t)(_us)) * 1000UL )
+#define Time_Max			((s_time_t) 0x7fffffffffffffffLL)
+#define FOREVER				Time_Max
+
+/*
+ * Wall Clock Time
+ */
+struct timeval {
+    long            tv_sec;         /* seconds */
+    long            tv_usec;        /* microseconds */
+};
+  
+struct timezone {
+    int     tz_minuteswest; /* minutes west of Greenwich */
+    int     tz_dsttime;     /* type of dst correction */
+};
+
+#ifdef __KERNEL__
+extern void do_gettimeofday(struct timeval *tv);
+extern void do_settimeofday(struct timeval *tv);
+extern void get_fast_time(struct timeval *tv);
+extern void (*do_get_fast_time)(struct timeval *);
+#endif
+
+/*
+ * Domain Virtual Time (defined in asm/time.h) 
+ */
+/* XXX Interface for getting and setting still missing */
+
+
+/* update the per domain time information */
+extern void update_dom_time(shared_info_t *si);
+
+/* XXX move this  */
+extern void do_timer(struct pt_regs *regs);
+
+#endif /* __XENO_TIME_H__ */
diff --git a/xen/include/xeno/timer.h b/xen/include/xeno/timer.h
new file mode 100644
index 0000000000..dcde75b182
--- /dev/null
+++ b/xen/include/xeno/timer.h
@@ -0,0 +1,81 @@
+#ifndef _LINUX_TIMER_H
+#define _LINUX_TIMER_H
+
+#include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * In Linux 2.4, static timers have been removed from the kernel.
+ * Timers may be dynamically created and destroyed, and should be initialized
+ * by a call to init_timer() upon creation.
+ *
+ * The "data" field enables use of a common timeout function for several
+ * timeouts. You can use this field to distinguish between the different
+ * invocations.
+ *
+ * RN: Unlike the Linux timers, which are executed at the periodic timer
+ *     interrupt, in Xen, the timer list is only checked "occasionally", thus
+ *     its accuracy might be somewhat worse than under Linux. However, the
+ *     hypervisor should be purely event-driven and, in fact, in the current
+ *     implementation, timers are only used for watchdog purpose at a very
+ *     coarse granularity anyway. Thus this is not a problem.
+ */
+struct timer_list {
+	struct list_head list;
+	unsigned long expires;		/* jiffies */
+	unsigned long data;
+	void (*function)(unsigned long);
+};
+
+extern void add_timer(struct timer_list * timer);
+extern int del_timer(struct timer_list * timer);
+
+#ifdef CONFIG_SMP
+extern int del_timer_sync(struct timer_list * timer);
+extern void sync_timers(void);
+#else
+#define del_timer_sync(t)	del_timer(t)
+#define sync_timers()		do { } while (0)
+#endif
+
+/*
+ * mod_timer is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a).
+ * If the timer is known to be not pending (ie, in the handler), mod_timer
+ * is less efficient than a->expires = b; add_timer(a).
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+extern void it_real_fn(unsigned long);
+
+static inline void init_timer(struct timer_list * timer)
+{
+	timer->list.next = timer->list.prev = NULL;
+}
+
+static inline int timer_pending (const struct timer_list * timer)
+{
+	return timer->list.next != NULL;
+}
+
+/*
+ *	These inlines deal with timer wrapping correctly. You are 
+ *	strongly encouraged to use them
+ *	1. Because people otherwise forget
+ *	2. Because if the timer wrap changes in future you wont have to
+ *	   alter your driver code.
+ *
+ * time_after(a,b) returns true if the time a is after time b.
+ *
+ * Do this with "<0" and ">=0" to only test the sign of the result. A
+ * good compiler would generate better code (and a really good compiler
+ * wouldn't care). Gcc is currently neither.
+ */
+#define time_after(a,b)		((long)(b) - (long)(a) < 0)
+#define time_before(a,b)	time_after(b,a)
+
+#define time_after_eq(a,b)	((long)(a) - (long)(b) >= 0)
+#define time_before_eq(a,b)	time_after_eq(b,a)
+
+#endif
diff --git a/xen/include/xeno/timex.h b/xen/include/xeno/timex.h
new file mode 100644
index 0000000000..3a00a26e2d
--- /dev/null
+++ b/xen/include/xeno/timex.h
@@ -0,0 +1,291 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright (c) David L. Mills 1993                                         *
+ *                                                                           *
+ * Permission to use, copy, modify, and distribute this software and its     *
+ * documentation for any purpose and without fee is hereby granted, provided *
+ * that the above copyright notice appears in all copies and that both the   *
+ * copyright notice and this permission notice appear in supporting          *
+ * documentation, and that the name University of Delaware not be used in    *
+ * advertising or publicity pertaining to distribution of the software       *
+ * without specific, written prior permission.  The University of Delaware   *
+ * makes no representations about the suitability this software for any      *
+ * purpose.  It is provided "as is" without express or implied warranty.     *
+ *                                                                           *
+ *****************************************************************************/
+
+/*
+ * Modification history timex.h
+ *
+ * 29 Dec 97	Russell King
+ *	Moved CLOCK_TICK_RATE, CLOCK_TICK_FACTOR and FINETUNE to asm/timex.h
+ *	for ARM machines
+ *
+ *  9 Jan 97    Adrian Sun
+ *      Shifted LATCH define to allow access to alpha machines.
+ *
+ * 26 Sep 94	David L. Mills
+ *	Added defines for hybrid phase/frequency-lock loop.
+ *
+ * 19 Mar 94	David L. Mills
+ *	Moved defines from kernel routines to header file and added new
+ *	defines for PPS phase-lock loop.
+ *
+ * 20 Feb 94	David L. Mills
+ *	Revised status codes and structures for external clock and PPS
+ *	signal discipline.
+ *
+ * 28 Nov 93	David L. Mills
+ *	Adjusted parameters to improve stability and increase poll
+ *	interval.
+ *
+ * 17 Sep 93    David L. Mills
+ *      Created file $NTP/include/sys/timex.h
+ * 07 Oct 93    Torsten Duwe
+ *      Derived linux/timex.h
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1997-08-30    Ulrich Windl
+ *      Added new constant NTP_PHASE_LIMIT
+ */
+#ifndef _LINUX_TIMEX_H
+#define _LINUX_TIMEX_H
+
+#include <asm/param.h>
+
+/*
+ * The following defines establish the engineering parameters of the PLL
+ * model. The HZ variable establishes the timer interrupt frequency, 100 Hz
+ * for the SunOS kernel, 256 Hz for the Ultrix kernel and 1024 Hz for the
+ * OSF/1 kernel. The SHIFT_HZ define expresses the same value as the
+ * nearest power of two in order to avoid hardware multiply operations.
+ */
+#if HZ >= 12 && HZ < 24
+# define SHIFT_HZ	4
+#elif HZ >= 24 && HZ < 48
+# define SHIFT_HZ	5
+#elif HZ >= 48 && HZ < 96
+# define SHIFT_HZ	6
+#elif HZ >= 96 && HZ < 192
+# define SHIFT_HZ	7
+#elif HZ >= 192 && HZ < 384
+# define SHIFT_HZ	8
+#elif HZ >= 384 && HZ < 768
+# define SHIFT_HZ	9
+#elif HZ >= 768 && HZ < 1536
+# define SHIFT_HZ	10
+#else
+# error You lose.
+#endif
+
+/*
+ * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
+ * for a slightly underdamped convergence characteristic. SHIFT_KH
+ * establishes the damping of the FLL and is chosen by wisdom and black
+ * art.
+ *
+ * MAXTC establishes the maximum time constant of the PLL. With the
+ * SHIFT_KG and SHIFT_KF values given and a time constant range from
+ * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
+ * respectively.
+ */
+#define SHIFT_KG 6		/* phase factor (shift) */
+#define SHIFT_KF 16		/* PLL frequency factor (shift) */
+#define SHIFT_KH 2		/* FLL frequency factor (shift) */
+#define MAXTC 6			/* maximum time constant (shift) */
+
+/*
+ * The SHIFT_SCALE define establishes the decimal point of the time_phase
+ * variable which serves as an extension to the low-order bits of the
+ * system clock variable. The SHIFT_UPDATE define establishes the decimal
+ * point of the time_offset variable which represents the current offset
+ * with respect to standard time. The FINEUSEC define represents 1 usec in
+ * scaled units.
+ *
+ * SHIFT_USEC defines the scaling (shift) of the time_freq and
+ * time_tolerance variables, which represent the current frequency
+ * offset and maximum frequency tolerance.
+ *
+ * FINEUSEC is 1 us in SHIFT_UPDATE units of the time_phase variable.
+ */
+#define SHIFT_SCALE 22		/* phase scale (shift) */
+#define SHIFT_UPDATE (SHIFT_KG + MAXTC) /* time offset scale (shift) */
+#define SHIFT_USEC 16		/* frequency offset scale (shift) */
+#define FINEUSEC (1L << SHIFT_SCALE) /* 1 us in phase units */
+
+#define MAXPHASE 512000L        /* max phase error (us) */
+#define MAXFREQ (512L << SHIFT_USEC)  /* max frequency error (ppm) */
+#define MAXTIME (200L << PPS_AVG) /* max PPS error (jitter) (200 us) */
+#define MINSEC 16L              /* min interval between updates (s) */
+#define MAXSEC 1200L            /* max interval between updates (s) */
+#define	NTP_PHASE_LIMIT	(MAXPHASE << 5)	/* beyond max. dispersion */
+
+/*
+ * The following defines are used only if a pulse-per-second (PPS)
+ * signal is available and connected via a modem control lead, such as
+ * produced by the optional ppsclock feature incorporated in the Sun
+ * asynch driver. They establish the design parameters of the frequency-
+ * lock loop used to discipline the CPU clock oscillator to the PPS
+ * signal.
+ *
+ * PPS_AVG is the averaging factor for the frequency loop, as well as
+ * the time and frequency dispersion.
+ *
+ * PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum
+ * calibration intervals, respectively, in seconds as a power of two.
+ *
+ * PPS_VALID is the maximum interval before the PPS signal is considered
+ * invalid and protocol updates used directly instead.
+ *
+ * MAXGLITCH is the maximum interval before a time offset of more than
+ * MAXTIME is believed.
+ */
+#define PPS_AVG 2		/* pps averaging constant (shift) */
+#define PPS_SHIFT 2		/* min interval duration (s) (shift) */
+#define PPS_SHIFTMAX 8		/* max interval duration (s) (shift) */
+#define PPS_VALID 120		/* pps signal watchdog max (s) */
+#define MAXGLITCH 30		/* pps signal glitch max (s) */
+
+/*
+ * Pick up the architecture specific timex specifications
+ */
+#include <asm/timex.h>
+
+/* LATCH is used in the interval timer and ftape setup. */
+#define LATCH  ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
+
+/*
+ * syscall interface - used (mainly by NTP daemon)
+ * to discipline kernel clock oscillator
+ */
+struct timex {
+	unsigned int modes;	/* mode selector */
+	long offset;		/* time offset (usec) */
+	long freq;		/* frequency offset (scaled ppm) */
+	long maxerror;		/* maximum error (usec) */
+	long esterror;		/* estimated error (usec) */
+	int status;		/* clock command/status */
+	long constant;		/* pll time constant */
+	long precision;		/* clock precision (usec) (read only) */
+	long tolerance;		/* clock frequency tolerance (ppm)
+				 * (read only)
+				 */
+	struct timeval time;	/* (read only) */
+	long tick;		/* (modified) usecs between clock ticks */
+
+	long ppsfreq;           /* pps frequency (scaled ppm) (ro) */
+	long jitter;            /* pps jitter (us) (ro) */
+	int shift;              /* interval duration (s) (shift) (ro) */
+	long stabil;            /* pps stability (scaled ppm) (ro) */
+	long jitcnt;            /* jitter limit exceeded (ro) */
+	long calcnt;            /* calibration intervals (ro) */
+	long errcnt;            /* calibration errors (ro) */
+	long stbcnt;            /* stability limit exceeded (ro) */
+
+	int  :32; int  :32; int  :32; int  :32;
+	int  :32; int  :32; int  :32; int  :32;
+	int  :32; int  :32; int  :32; int  :32;
+};
+
+/*
+ * Mode codes (timex.mode)
+ */
+#define ADJ_OFFSET		0x0001	/* time offset */
+#define ADJ_FREQUENCY		0x0002	/* frequency offset */
+#define ADJ_MAXERROR		0x0004	/* maximum time error */
+#define ADJ_ESTERROR		0x0008	/* estimated time error */
+#define ADJ_STATUS		0x0010	/* clock status */
+#define ADJ_TIMECONST		0x0020	/* pll time constant */
+#define ADJ_TICK		0x4000	/* tick value */
+#define ADJ_OFFSET_SINGLESHOT	0x8001	/* old-fashioned adjtime */
+
+/* xntp 3.4 compatibility names */
+#define MOD_OFFSET	ADJ_OFFSET
+#define MOD_FREQUENCY	ADJ_FREQUENCY
+#define MOD_MAXERROR	ADJ_MAXERROR
+#define MOD_ESTERROR	ADJ_ESTERROR
+#define MOD_STATUS	ADJ_STATUS
+#define MOD_TIMECONST	ADJ_TIMECONST
+#define MOD_CLKB	ADJ_TICK
+#define MOD_CLKA	ADJ_OFFSET_SINGLESHOT /* 0x8000 in original */
+
+
+/*
+ * Status codes (timex.status)
+ */
+#define STA_PLL		0x0001	/* enable PLL updates (rw) */
+#define STA_PPSFREQ	0x0002	/* enable PPS freq discipline (rw) */
+#define STA_PPSTIME	0x0004	/* enable PPS time discipline (rw) */
+#define STA_FLL		0x0008	/* select frequency-lock mode (rw) */
+
+#define STA_INS		0x0010	/* insert leap (rw) */
+#define STA_DEL		0x0020	/* delete leap (rw) */
+#define STA_UNSYNC	0x0040	/* clock unsynchronized (rw) */
+#define STA_FREQHOLD	0x0080	/* hold frequency (rw) */
+
+#define STA_PPSSIGNAL	0x0100	/* PPS signal present (ro) */
+#define STA_PPSJITTER	0x0200	/* PPS signal jitter exceeded (ro) */
+#define STA_PPSWANDER	0x0400	/* PPS signal wander exceeded (ro) */
+#define STA_PPSERROR	0x0800	/* PPS signal calibration error (ro) */
+
+#define STA_CLOCKERR	0x1000	/* clock hardware fault (ro) */
+
+#define STA_RONLY (STA_PPSSIGNAL | STA_PPSJITTER | STA_PPSWANDER | \
+    STA_PPSERROR | STA_CLOCKERR) /* read-only bits */
+
+/*
+ * Clock states (time_state)
+ */
+#define TIME_OK		0	/* clock synchronized, no leap second */
+#define TIME_INS	1	/* insert leap second */
+#define TIME_DEL	2	/* delete leap second */
+#define TIME_OOP	3	/* leap second in progress */
+#define TIME_WAIT	4	/* leap second has occurred */
+#define TIME_ERROR	5	/* clock not synchronized */
+#define TIME_BAD	TIME_ERROR /* bw compat */
+
+#ifdef __KERNEL__
+/*
+ * kernel variables
+ * Note: maximum error = NTP synch distance = dispersion + delay / 2;
+ * estimated error = NTP dispersion.
+ */
+extern long tick;                      /* timer interrupt period */
+extern int tickadj;			/* amount of adjustment per tick */
+
+/*
+ * phase-lock loop variables
+ */
+extern int time_state;		/* clock status */
+extern int time_status;		/* clock synchronization status bits */
+extern long time_offset;	/* time adjustment (us) */
+extern long time_constant;	/* pll time constant */
+extern long time_tolerance;	/* frequency tolerance (ppm) */
+extern long time_precision;	/* clock precision (us) */
+extern long time_maxerror;	/* maximum error */
+extern long time_esterror;	/* estimated error */
+
+extern long time_phase;		/* phase offset (scaled us) */
+extern long time_freq;		/* frequency offset (scaled ppm) */
+extern long time_adj;		/* tick adjust (scaled 1 / HZ) */
+extern long time_reftime;	/* time at last adjustment (s) */
+
+extern long time_adjust;	/* The amount of adjtime left */
+
+/* interface variables pps->timer interrupt */
+extern long pps_offset;		/* pps time offset (us) */
+extern long pps_jitter;		/* time dispersion (jitter) (us) */
+extern long pps_freq;		/* frequency offset (scaled ppm) */
+extern long pps_stabil;		/* frequency dispersion (scaled ppm) */
+extern long pps_valid;		/* pps signal watchdog counter */
+
+/* interface variables pps->adjtimex */
+extern int pps_shift;		/* interval duration (s) (shift) */
+extern long pps_jitcnt;		/* jitter limit exceeded */
+extern long pps_calcnt;		/* calibration intervals */
+extern long pps_errcnt;		/* calibration errors */
+extern long pps_stbcnt;		/* stability limit exceeded */
+
+#endif /* KERNEL */
+
+#endif /* LINUX_TIMEX_H */
diff --git a/xen/include/xeno/tqueue.h b/xen/include/xeno/tqueue.h
new file mode 100644
index 0000000000..4a730f0ad9
--- /dev/null
+++ b/xen/include/xeno/tqueue.h
@@ -0,0 +1,125 @@
+/*
+ * tqueue.h --- task queue handling for Linux.
+ *
+ * Mostly based on a proposed bottom-half replacement code written by
+ * Kai Petzke, wpp@marie.physik.tu-berlin.de.
+ *
+ * Modified for use in the Linux kernel by Theodore Ts'o,
+ * tytso@mit.edu.  Any bugs are my fault, not Kai's.
+ *
+ * The original comment follows below.
+ */
+
+#ifndef _LINUX_TQUEUE_H
+#define _LINUX_TQUEUE_H
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <asm/bitops.h>
+#include <asm/system.h>
+
+/*
+ * New proposed "bottom half" handlers:
+ * (C) 1994 Kai Petzke, wpp@marie.physik.tu-berlin.de
+ *
+ * Advantages:
+ * - Bottom halfs are implemented as a linked list.  You can have as many
+ *   of them, as you want.
+ * - No more scanning of a bit field is required upon call of a bottom half.
+ * - Support for chained bottom half lists.  The run_task_queue() function can be
+ *   used as a bottom half handler.  This is for example useful for bottom
+ *   halfs, which want to be delayed until the next clock tick.
+ *
+ * Notes:
+ * - Bottom halfs are called in the reverse order that they were linked into
+ *   the list.
+ */
+
+struct tq_struct {
+	struct list_head list;		/* linked list of active bh's */
+	unsigned long sync;		/* must be initialized to zero */
+	void (*routine)(void *);	/* function to call */
+	void *data;			/* argument to function */
+};
+
+/*
+ * Emit code to initialise a tq_struct's routine and data pointers
+ */
+#define PREPARE_TQUEUE(_tq, _routine, _data)			\
+	do {							\
+		(_tq)->routine = _routine;			\
+		(_tq)->data = _data;				\
+	} while (0)
+
+/*
+ * Emit code to initialise all of a tq_struct
+ */
+#define INIT_TQUEUE(_tq, _routine, _data)			\
+	do {							\
+		INIT_LIST_HEAD(&(_tq)->list);			\
+		(_tq)->sync = 0;				\
+		PREPARE_TQUEUE((_tq), (_routine), (_data));	\
+	} while (0)
+
+typedef struct list_head task_queue;
+
+#define DECLARE_TASK_QUEUE(q)	LIST_HEAD(q)
+#define TQ_ACTIVE(q)		(!list_empty(&q))
+
+extern task_queue tq_timer, tq_immediate, tq_disk;
+
+/*
+ * To implement your own list of active bottom halfs, use the following
+ * two definitions:
+ *
+ * DECLARE_TASK_QUEUE(my_tqueue);
+ * struct tq_struct my_task = {
+ * 	routine: (void (*)(void *)) my_routine,
+ *	data: &my_data
+ * };
+ *
+ * To activate a bottom half on a list, use:
+ *
+ *	queue_task(&my_task, &my_tqueue);
+ *
+ * To later run the queued tasks use
+ *
+ *	run_task_queue(&my_tqueue);
+ *
+ * This allows you to do deferred processing.  For example, you could
+ * have a task queue called tq_timer, which is executed within the timer
+ * interrupt.
+ */
+
+extern spinlock_t tqueue_lock;
+
+/*
+ * Queue a task on a tq.  Return non-zero if it was successfully
+ * added.
+ */
+static inline int queue_task(struct tq_struct *bh_pointer, task_queue *bh_list)
+{
+	int ret = 0;
+	if (!test_and_set_bit(0,&bh_pointer->sync)) {
+		unsigned long flags;
+		spin_lock_irqsave(&tqueue_lock, flags);
+		list_add_tail(&bh_pointer->list, bh_list);
+		spin_unlock_irqrestore(&tqueue_lock, flags);
+		ret = 1;
+	}
+	return ret;
+}
+
+/*
+ * Call all "bottom halfs" on a given list.
+ */
+
+extern void __run_task_queue(task_queue *list);
+
+static inline void run_task_queue(task_queue *list)
+{
+	if (TQ_ACTIVE(*list))
+		__run_task_queue(list);
+}
+
+#endif /* _LINUX_TQUEUE_H */
diff --git a/xen/include/xeno/types.h b/xen/include/xeno/types.h
new file mode 100644
index 0000000000..c5f8d5586d
--- /dev/null
+++ b/xen/include/xeno/types.h
@@ -0,0 +1,50 @@
+#ifndef __TYPES_H__
+#define __TYPES_H__
+
+#include <asm/types.h>
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+#define INT_MAX         ((int)(~0U>>1))
+#define INT_MIN         (-INT_MAX - 1)
+#define UINT_MAX        (~0U)
+#define LONG_MAX        ((long)(~0UL>>1))
+#define LONG_MIN        (-LONG_MAX - 1)
+#define ULONG_MAX       (~0UL)
+
+typedef unsigned int size_t;
+
+/* bsd */
+typedef unsigned char           u_char;
+typedef unsigned short          u_short;
+typedef unsigned int            u_int;
+typedef unsigned long           u_long;
+
+/* sysv */
+typedef unsigned char           unchar;
+typedef unsigned short          ushort;
+typedef unsigned int            uint;
+typedef unsigned long           ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef         __u8            u_int8_t;
+typedef         __s8            int8_t;
+typedef         __u16           u_int16_t;
+typedef         __s16           int16_t;
+typedef         __u32           u_int32_t;
+typedef         __s32           int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef         __u8            uint8_t;
+typedef         __u16           uint16_t;
+typedef         __u32           uint32_t;
+typedef         __u64           uint64_t;
+
+
+
+#endif /* __TYPES_H__ */
diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h
new file mode 100644
index 0000000000..7b56caaabe
--- /dev/null
+++ b/xen/include/xeno/vif.h
@@ -0,0 +1,94 @@
+/* vif.h
+ * 
+ * This is the hypervisor end of the network code.  The net_ring structure
+ * stored in each vif is placed on a shared page to interact with the guest VM.
+ *
+ * Copyright (c) 2002, A K Warfield and K A Fraser
+ */
+
+/* virtual network interface struct and associated defines. */
+/* net_vif_st is the larger struct that describes a virtual network interface
+ * it contains a pointer to the net_ring_t structure that needs to be on a 
+ * shared page between the hypervisor and guest.  The vif struct is private 
+ * to the hypervisor and is used primarily as a container to allow routing 
+ * and interface administration.  This define should eventually be moved to 
+ * a non-shared interface file, as it is of no relevance to the guest.
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <xeno/skbuff.h>
+
+/* 
+ * shadow ring structures are used to protect the descriptors from
+ * tampering after they have been passed to the hypervisor.
+ *
+ * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
+ */
+
+typedef struct rx_shadow_entry_st {
+    unsigned long  addr;
+    unsigned short size;
+    unsigned short status;
+    unsigned long  flush_count;
+} rx_shadow_entry_t;
+
+typedef struct tx_shadow_entry_st {
+    void          *header;
+    unsigned long  payload;
+    unsigned short size;
+    unsigned short status;
+} tx_shadow_entry_t;
+
+typedef struct net_shadow_ring_st {
+    rx_shadow_entry_t *rx_ring;
+    tx_shadow_entry_t *tx_ring;
+
+    /*
+     * Private copy of producer. Follows guest OS version, but never
+     * catches up with our consumer index.
+     */
+    unsigned int rx_prod;
+    /* Points at next buffer to be filled by NIC. Chases rx_prod. */
+    unsigned int rx_idx;
+    /* Points at next buffer to be returned to the guest OS. Chases rx_idx. */
+    unsigned int rx_cons;
+
+    /*
+     * Private copy of producer. Follows guest OS version, but never
+     * catches up with our consumer index.
+     */
+    unsigned int tx_prod;
+    /* Points at next buffer to be scheduled. Chases tx_prod. */
+    unsigned int tx_idx;
+    /* Points at next buffer to be returned to the guest OS. Chases tx_idx. */
+    unsigned int tx_cons;
+} net_shadow_ring_t;
+
+typedef struct net_vif_st {
+    net_ring_t         *net_ring;
+    net_shadow_ring_t  *shadow_ring;
+    int                 id;
+    struct task_struct *domain;
+    struct list_head    list;
+} net_vif_t;
+
+/* VIF-related defines. */
+#define MAX_GUEST_VIFS    2 // each VIF is a small overhead in task_struct
+#define MAX_SYSTEM_VIFS 256  
+
+/* vif globals */
+extern int sys_vif_count;
+extern net_vif_t *sys_vif_list[];
+
+/* vif prototypes */
+net_vif_t *create_net_vif(int domain);
+void destroy_net_vif(struct task_struct *p);
+void add_default_net_rule(int vif_id, u32 ipaddr);
+int __net_get_target_vif(u8 *data, unsigned int len, int src_vif);
+void add_default_net_rule(int vif_id, u32 ipaddr);
+
+#define net_get_target_vif(skb) __net_get_target_vif(skb->data, skb->len, skb->src_vif)
+/* status fields per-descriptor:
+ */
+
+
diff --git a/xen/net/Makefile b/xen/net/Makefile
new file mode 100644
index 0000000000..e9a8eba3d7
--- /dev/null
+++ b/xen/net/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+	$(LD) -r -o network.o $(OBJS)
+
+clean:
+	rm -f *.o *~ core
diff --git a/xen/net/dev.c b/xen/net/dev.c
new file mode 100644
index 0000000000..c42e516686
--- /dev/null
+++ b/xen/net/dev.c
@@ -0,0 +1,2019 @@
+/*
+ * 	NET3	Protocol independent device support routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/lib.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/brlock.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <linux/event.h>
+#include <asm/domain_page.h>
+#include <asm/pgalloc.h>
+
+#define BUG_TRAP ASSERT
+#define notifier_call_chain(_a,_b,_c) ((void)0)
+#define rtmsg_ifinfo(_a,_b,_c) ((void)0)
+#define rtnl_lock() ((void)0)
+#define rtnl_unlock() ((void)0)
+
+#if 1
+#define DPRINTK(_f, _a...) printk(_f , ## _a)
+#else 
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
+#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
+#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
+#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+
+struct net_device *the_dev = NULL;
+
+/*
+ * Transmitted packets are fragmented, so we can copy the important headesr 
+ * before checking them for validity. Avoids need for page protection.
+ */
+/* Ethernet + IP headers */
+#define PKT_PROT_LEN (ETH_HLEN + 20)
+static kmem_cache_t *net_header_cachep;
+
+/**
+ *	__dev_get_by_name	- find a device by its name 
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	or @dev_base_lock. If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+ 
+
+struct net_device *__dev_get_by_name(const char *name)
+{
+    struct net_device *dev;
+
+    for (dev = dev_base; dev != NULL; dev = dev->next) {
+        if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+            return dev;
+    }
+    return NULL;
+}
+
+/**
+ *	dev_get_by_name		- find a device by its name
+ *	@name: name to find
+ *
+ *	Find an interface by name. This can be called from any 
+ *	context and does its own locking. The returned handle has
+ *	the usage count incremented and the caller must use dev_put() to
+ *	release it when it is no longer needed. %NULL is returned if no
+ *	matching device is found.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+    struct net_device *dev;
+
+    read_lock(&dev_base_lock);
+    dev = __dev_get_by_name(name);
+    if (dev)
+        dev_hold(dev);
+    read_unlock(&dev_base_lock);
+    return dev;
+}
+
+/**
+ *	dev_get	-	test if a device exists
+ *	@name:	name to test for
+ *
+ *	Test if a name exists. Returns true if the name is found. In order
+ *	to be sure the name is not allocated or removed during the test the
+ *	caller must hold the rtnl semaphore.
+ *
+ *	This function primarily exists for back compatibility with older
+ *	drivers. 
+ */
+ 
+int dev_get(const char *name)
+{
+    struct net_device *dev;
+
+    read_lock(&dev_base_lock);
+    dev = __dev_get_by_name(name);
+    read_unlock(&dev_base_lock);
+    return dev != NULL;
+}
+
+/**
+ *	__dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold either the RTNL semaphore
+ *	or @dev_base_lock.
+ */
+
+struct net_device * __dev_get_by_index(int ifindex)
+{
+    struct net_device *dev;
+
+    for (dev = dev_base; dev != NULL; dev = dev->next) {
+        if (dev->ifindex == ifindex)
+            return dev;
+    }
+    return NULL;
+}
+
+
+/**
+ *	dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns NULL if the device
+ *	is not found or a pointer to the device. The device returned has 
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_get_by_index(int ifindex)
+{
+    struct net_device *dev;
+
+    read_lock(&dev_base_lock);
+    dev = __dev_get_by_index(ifindex);
+    if (dev)
+        dev_hold(dev);
+    read_unlock(&dev_base_lock);
+    return dev;
+}
+
+/**
+ *	dev_getbyhwaddr - find a device by its hardware address
+ *	@type: media type of device
+ *	@ha: hardware address
+ *
+ *	Search for an interface by MAC address. Returns NULL if the device
+ *	is not found or a pointer to the device. The caller must hold the
+ *	rtnl semaphore. The returned device has not had its ref count increased
+ *	and the caller must therefore be careful about locking
+ *
+ *	BUGS:
+ *	If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+    struct net_device *dev;
+
+    for (dev = dev_base; dev != NULL; dev = dev->next) {
+        if (dev->type == type &&
+            memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
+            return dev;
+    }
+    return NULL;
+}
+
+/**
+ *	dev_alloc_name - allocate a name for a device
+ *	@dev: device 
+ *	@name: name format string
+ *
+ *	Passed a format string - eg "lt%d" it will try and find a suitable
+ *	id. Not efficient for many devices, not called a lot. The caller
+ *	must hold the dev_base or rtnl lock while allocating the name and
+ *	adding the device in order to avoid duplicates. Returns the number
+ *	of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+    int i;
+    char buf[32];
+    char *p;
+
+    /*
+     * Verify the string as this thing may have come from
+     * the user.  There must be either one "%d" and no other "%"
+     * characters, or no "%" characters at all.
+     */
+    p = strchr(name, '%');
+    if (p && (p[1] != 'd' || strchr(p+2, '%')))
+        return -EINVAL;
+
+    /*
+     * If you need over 100 please also fix the algorithm...
+     */
+    for (i = 0; i < 100; i++) {
+        snprintf(buf,sizeof(buf),name,i);
+        if (__dev_get_by_name(buf) == NULL) {
+            strcpy(dev->name, buf);
+            return i;
+        }
+    }
+    return -ENFILE;	/* Over 100 of the things .. bail out! */
+}
+
+/**
+ *	dev_alloc - allocate a network device and name
+ *	@name: name format string
+ *	@err: error return pointer
+ *
+ *	Passed a format string, eg. "lt%d", it will allocate a network device
+ *	and space for the name. %NULL is returned if no memory is available.
+ *	If the allocation succeeds then the name is assigned and the 
+ *	device pointer returned. %NULL is returned if the name allocation
+ *	failed. The cause of an error is returned as a negative errno code
+ *	in the variable @err points to.
+ *
+ *	The caller must hold the @dev_base or RTNL locks when doing this in
+ *	order to avoid duplicate name allocations.
+ */
+
+struct net_device *dev_alloc(const char *name, int *err)
+{
+    struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
+    if (dev == NULL) {
+        *err = -ENOBUFS;
+        return NULL;
+    }
+    memset(dev, 0, sizeof(struct net_device));
+    *err = dev_alloc_name(dev, name);
+    if (*err < 0) {
+        kfree(dev);
+        return NULL;
+    }
+    return dev;
+}
+
+/**
+ *	netdev_state_change - device changes state
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed state. This function calls
+ *	the notifier chains for netdev_chain and sends a NEWLINK message
+ *	to the routing socket.
+ */
+ 
+void netdev_state_change(struct net_device *dev)
+{
+    if (dev->flags&IFF_UP) {
+        notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+        rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+    }
+}
+
+
+#ifdef CONFIG_KMOD
+
+/**
+ *	dev_load 	- load a network module
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
+void dev_load(const char *name)
+{
+    if (!dev_get(name) && capable(CAP_SYS_MODULE))
+        request_module(name);
+}
+
+#else
+
+extern inline void dev_load(const char *unused){;}
+
+#endif
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+    printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", 
+           skb->dev ? skb->dev->name : "NULL!!!");
+    kfree_skb(skb);
+    return 1;
+}
+
+/**
+ *	dev_open	- prepare an interface for use. 
+ *	@dev:	device to open
+ *
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
+ */
+ 
+int dev_open(struct net_device *dev)
+{
+    int ret = 0;
+
+    /*
+     *	Is it already up?
+     */
+
+    if (dev->flags&IFF_UP)
+        return 0;
+
+    /*
+     *	Is it even present?
+     */
+    if (!netif_device_present(dev))
+        return -ENODEV;
+
+    /*
+     *	Call device private open method
+     */
+    if (try_inc_mod_count(dev->owner)) {
+        if (dev->open) {
+            ret = dev->open(dev);
+            if (ret != 0 && dev->owner)
+                __MOD_DEC_USE_COUNT(dev->owner);
+        }
+    } else {
+        ret = -ENODEV;
+    }
+
+    /*
+     *	If it went open OK then:
+     */
+	 
+    if (ret == 0) 
+    {
+        /*
+         *	Set the flags.
+         */
+        dev->flags |= IFF_UP;
+
+        set_bit(__LINK_STATE_START, &dev->state);
+
+        /*
+         *	Initialize multicasting status 
+         */
+        dev_mc_upload(dev);
+
+        /*
+         *	Wakeup transmit queue engine
+         */
+        dev_activate(dev);
+
+        /*
+         *	... and announce new interface.
+         */
+        notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+    }
+    return(ret);
+}
+
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A 
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+ 
+int dev_close(struct net_device *dev)
+{
+    if (!(dev->flags&IFF_UP))
+        return 0;
+
+    /*
+     *	Tell people we are going down, so that they can
+     *	prepare to death, when device is still operating.
+     */
+    notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
+    dev_deactivate(dev);
+
+    clear_bit(__LINK_STATE_START, &dev->state);
+
+    /*
+     *	Call the device specific close. This cannot fail.
+     *	Only if device is UP
+     *
+     *	We allow it to be called even after a DETACH hot-plug
+     *	event.
+     */
+	 
+    if (dev->stop)
+        dev->stop(dev);
+
+    /*
+     *	Device is now down.
+     */
+
+    dev->flags &= ~IFF_UP;
+
+    /*
+     *	Tell people we are down
+     */
+    notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+    /*
+     * Drop the module refcount
+     */
+    if (dev->owner)
+        __MOD_DEC_USE_COUNT(dev->owner);
+
+    return(0);
+}
+
+
+#ifdef CONFIG_HIGHMEM
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static inline int
+illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+    int i;
+
+    if (dev->features&NETIF_F_HIGHDMA)
+        return 0;
+
+    for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
+        if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
+            return 1;
+
+    return 0;
+}
+#else
+#define illegal_highdma(dev, skb)	(0)
+#endif
+
+
+/*=======================================================================
+			Receiver routines
+  =======================================================================*/
+
+struct netif_rx_stats netdev_rx_stat[NR_CPUS];
+
+void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
+{
+    net_shadow_ring_t *shadow_ring;
+    rx_shadow_entry_t *rx;
+    unsigned long *g_pte; 
+    struct pfn_info *g_pfn, *h_pfn;
+    unsigned int i; 
+
+    memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
+    if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
+    {
+        memset(skb->nh.raw + 18, 0, ETH_ALEN);
+    }
+    shadow_ring = vif->shadow_ring;
+
+    if ( (i = shadow_ring->rx_idx) == shadow_ring->rx_prod )
+    {
+        return;
+    }
+
+    if ( shadow_ring->rx_ring[i].status != RING_STATUS_OK )
+    {
+        DPRINTK("Bad buffer in deliver_packet()\n");
+        shadow_ring->rx_idx = RX_RING_INC(i);
+        return;
+    }
+
+    rx = shadow_ring->rx_ring + i;
+    if ( (skb->len + ETH_HLEN) < rx->size )
+        rx->size = skb->len + ETH_HLEN;
+            
+    g_pte = map_domain_mem(rx->addr);
+
+    g_pfn = frame_table + (*g_pte >> PAGE_SHIFT);
+    h_pfn = skb->pf;
+        
+    h_pfn->tot_count = h_pfn->type_count = 1;
+    g_pfn->tot_count = g_pfn->type_count = 0;
+    h_pfn->flags = g_pfn->flags & (~PG_type_mask);
+        
+    if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page;
+    g_pfn->flags = 0;
+        
+    /* Point the guest at the new machine frame. */
+    machine_to_phys_mapping[h_pfn - frame_table] 
+        = machine_to_phys_mapping[g_pfn - frame_table];        
+    *g_pte = (*g_pte & ~PAGE_MASK) 
+        | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
+    *g_pte |= _PAGE_PRESENT;
+        
+    unmap_domain_mem(g_pte);
+
+    /* Our skbuff now points at the guest's old frame. */
+    skb->pf = g_pfn;
+        
+    shadow_ring->rx_idx = RX_RING_INC(i);
+}
+
+/**
+ *	netif_rx	-	post buffer to the network code
+ *	@skb: buffer to post
+ *
+ *	This function receives a packet from a device driver and queues it for
+ *	the upper (protocol) levels to process.  It always succeeds. The buffer
+ *	may be dropped during processing for congestion control or by the 
+ *	protocol layers.
+ *      
+ *	return values:
+ *	NET_RX_SUCCESS	(no congestion)           
+ *	NET_RX_DROP    (packet was dropped)
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+#ifdef CONFIG_SMP
+    unsigned long cpu_mask;
+#endif
+        
+    struct task_struct *p;
+    int this_cpu = smp_processor_id();
+    unsigned long flags;
+    net_vif_t *vif;
+
+    local_irq_save(flags);
+
+    ASSERT(skb->skb_type == SKB_ZERO_COPY);
+    ASSERT((skb->data - skb->head) == (18 + ETH_HLEN));
+        
+    skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
+
+    /*
+     * remapping this address really screws up all the skb pointers.  We
+     * need to map them all here sufficiently to get the packet
+     * demultiplexed. this remapping happens more than once in the code and
+     * is grim.  It will be fixed in a later update -- drivers should be
+     * able to align the packet arbitrarily.
+     */
+                
+    skb->data = skb->head;
+    skb_reserve(skb,18); /* 18 is the 16 from dev_alloc_skb plus 2 for
+                            IP header alignment. */
+    skb->mac.raw = skb->data;
+    skb->data += ETH_HLEN;
+    skb->nh.raw = skb->data;
+        
+    netdev_rx_stat[this_cpu].total++;
+
+    if ( skb->src_vif == VIF_UNKNOWN_INTERFACE )
+        skb->src_vif = VIF_PHYSICAL_INTERFACE;
+                
+    if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
+        skb->dst_vif = __net_get_target_vif(skb->mac.raw, 
+                                            skb->len, skb->src_vif);
+        
+    if ( (vif = sys_vif_list[skb->dst_vif]) == NULL )
+        goto drop;
+
+    /*
+     * This lock-and-walk of the task list isn't really necessary, and is
+     * an artifact of the old code.  The vif contains a pointer to the skb
+     * list we are going to queue the packet in, so the lock and the inner
+     * loop could be removed. The argument against this is a possible race
+     * in which a domain is killed as packets are being delivered to it.
+     * This would result in the dest vif vanishing before we can deliver to
+     * it.
+     */
+        
+    if ( skb->dst_vif >= VIF_PHYSICAL_INTERFACE )
+    {
+        read_lock(&tasklist_lock);
+        p = &idle0_task;
+        do {
+            if ( p != vif->domain ) continue;
+            deliver_packet(skb, vif);
+            cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
+            read_unlock(&tasklist_lock);
+            goto found;
+        }
+        while ( (p = p->next_task) != &idle0_task );
+        read_unlock(&tasklist_lock); 
+        goto drop;
+    }
+
+ drop:
+    netdev_rx_stat[this_cpu].dropped++;
+    unmap_domain_mem(skb->head);
+    kfree_skb(skb);
+    local_irq_restore(flags);
+    return NET_RX_DROP;
+
+ found:
+    unmap_domain_mem(skb->head);
+    skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
+    kfree_skb(skb);
+    hyp_event_notify(cpu_mask);
+    local_irq_restore(flags);
+    return NET_RX_SUCCESS;
+}
+
+
+/*************************************************************
+ * NEW TRANSMIT SCHEDULER
+ */
+
+struct list_head net_schedule_list;
+spinlock_t net_schedule_list_lock;
+
+static int __on_net_schedule_list(net_vif_t *vif)
+{
+    return vif->list.next != NULL;
+}
+
+static void remove_from_net_schedule_list(net_vif_t *vif)
+{
+    unsigned long flags;
+    if ( !__on_net_schedule_list(vif) ) return;
+    spin_lock_irqsave(&net_schedule_list_lock, flags);
+    if ( __on_net_schedule_list(vif) )
+    {
+        list_del(&vif->list);
+        vif->list.next = NULL;
+    }
+    spin_unlock_irqrestore(&net_schedule_list_lock, flags);
+}
+
+static void add_to_net_schedule_list_tail(net_vif_t *vif)
+{
+    unsigned long flags;
+    if ( __on_net_schedule_list(vif) ) return;
+    spin_lock_irqsave(&net_schedule_list_lock, flags);
+    if ( !__on_net_schedule_list(vif) )
+    {
+        list_add_tail(&vif->list, &net_schedule_list);
+    }
+    spin_unlock_irqrestore(&net_schedule_list_lock, flags);
+}
+
+
+/* Destructor function for tx skbs. */
+static void tx_skb_release(struct sk_buff *skb)
+{
+    int i;
+    net_vif_t *vif = sys_vif_list[skb->src_vif];
+    
+    for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
+        put_page_tot(skb_shinfo(skb)->frags[i].page);
+
+    if ( skb->skb_type == SKB_NODATA )
+        kmem_cache_free(net_header_cachep, skb->head);
+
+    skb_shinfo(skb)->nr_frags = 0; 
+
+    /* This would mean that the guest OS has fiddled with our index. */
+    if ( vif->shadow_ring->tx_cons != vif->net_ring->tx_cons )
+        DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
+                vif->shadow_ring->tx_cons, vif->net_ring->tx_cons);
+
+    /*
+     * XXX This assumes that, per vif, SKBs are processed in-order!
+     * Also assumes no concurrency. This is safe because each vif
+     * maps to one NIC. This is executed in NIC interrupt code, so we have
+     * mutual exclusion from do_IRQ().
+     */
+    vif->shadow_ring->tx_cons = TX_RING_INC(vif->shadow_ring->tx_cons);
+    vif->net_ring->tx_cons = vif->shadow_ring->tx_cons;
+    if ( vif->net_ring->tx_cons == vif->net_ring->tx_event )
+        set_bit(_EVENT_NET_TX, 
+                &sys_vif_list[skb->src_vif]->domain->shared_info->events);
+}
+
+    
+static void net_tx_action(unsigned long unused)
+{
+    struct net_device *dev = the_dev;
+    struct list_head *ent;
+    struct sk_buff *skb;
+    net_vif_t *vif;
+    tx_shadow_entry_t *tx;
+    int pending_bytes = 0, pending_bytes_max = 1;
+
+    spin_lock(&dev->xmit_lock);
+    while ( !netif_queue_stopped(dev) &&
+            (pending_bytes < pending_bytes_max) &&
+            !list_empty(&net_schedule_list) )
+    {
+        /* Get a vif from the list with work to do. */
+        ent = net_schedule_list.next;
+        vif = list_entry(ent, net_vif_t, list);
+        remove_from_net_schedule_list(vif);
+        if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod )
+            continue;
+
+        /* Check the chosen entry is good. */
+        tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
+        if ( tx->status != RING_STATUS_OK ) goto skip_desc;
+
+        if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
+        {
+            add_to_net_schedule_list_tail(vif);
+            printk("Out of memory in net_tx_action()!\n");
+            goto out;
+        }
+        
+        skb->destructor = tx_skb_release;
+        
+        skb->head = skb->data = tx->header;
+        skb->end  = skb->tail = skb->head + PKT_PROT_LEN;
+        
+        skb->dev      = the_dev;
+        skb->src_vif  = vif->id;
+        skb->dst_vif  = VIF_PHYSICAL_INTERFACE;
+        skb->mac.raw  = skb->data; 
+        
+        skb_shinfo(skb)->frags[0].page        = frame_table +
+            (tx->payload >> PAGE_SHIFT);
+        skb_shinfo(skb)->frags[0].size        = tx->size - PKT_PROT_LEN;
+        skb_shinfo(skb)->frags[0].page_offset = tx->payload & ~PAGE_MASK;
+        skb_shinfo(skb)->nr_frags = 1;
+
+        skb->data_len = tx->size - PKT_PROT_LEN;
+        skb->len      = tx->size;
+
+        /* Transmit should always work, or the queue would be stopped. */
+        if ( dev->hard_start_xmit(skb, dev) != 0 )
+        {
+            add_to_net_schedule_list_tail(vif);
+            printk("Weird failure in hard_start_xmit!\n");
+            goto out;
+        }
+
+    skip_desc:
+        vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
+        if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
+            add_to_net_schedule_list_tail(vif);
+    }
+ out:
+    spin_unlock(&dev->xmit_lock);
+}
+
+DECLARE_TASKLET_DISABLED(net_tx_tasklet, net_tx_action, 0);
+
+
+/*
+ * update_shared_ring(void)
+ * 
+ * This replaces flush_rx_queue as the guest event handler to move packets
+ * queued in the guest ring up to the guest.  Really, the packet is already
+ * there, it was page flipped in deliver_packet, but this moves the ring
+ * descriptor across from the shadow ring and increments the pointers.
+ */
+
+void update_shared_ring(void)
+{
+    rx_shadow_entry_t *rx;
+    shared_info_t *s = current->shared_info;
+    net_ring_t *net_ring;
+    net_shadow_ring_t *shadow_ring;
+    unsigned int nvif;
+    
+    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
+
+    for ( nvif = 0; nvif < current->num_net_vifs; nvif++ )
+    {
+        net_ring = current->net_vif_list[nvif]->net_ring;
+        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
+
+        /* This would mean that the guest OS has fiddled with our index. */
+        if ( shadow_ring->rx_cons != net_ring->rx_cons )
+            DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
+                    shadow_ring->rx_cons, net_ring->rx_cons);
+
+        while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
+        {
+            rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
+            copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx, 
+                         sizeof(rx_entry_t));
+
+            if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
+                __flush_tlb();
+
+            shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
+
+            if ( shadow_ring->rx_cons == net_ring->rx_event )
+                set_bit(_EVENT_NET_RX, &s->events);
+        }
+        net_ring->rx_cons = shadow_ring->rx_cons;
+    }
+}
+
+
+/*
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+    struct net_device *dev;
+    struct ifreq ifr;
+
+    /*
+     *	Fetch the caller's info block. 
+     */
+	
+    if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+        return -EFAULT;
+
+    read_lock(&dev_base_lock);
+    dev = __dev_get_by_index(ifr.ifr_ifindex);
+    if (!dev) {
+        read_unlock(&dev_base_lock);
+        return -ENODEV;
+    }
+
+    strcpy(ifr.ifr_name, dev->name);
+    read_unlock(&dev_base_lock);
+
+    if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+        return -EFAULT;
+    return 0;
+}
+
+
+/**
+ *	netdev_set_master	-	set up master/slave pair
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass %NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success the reference counts
+ *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
+ *	function returns zero.
+ */
+ 
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+    struct net_device *old = slave->master;
+
+    if (master) {
+        if (old)
+            return -EBUSY;
+        dev_hold(master);
+    }
+
+    br_write_lock_bh(BR_NETPROTO_LOCK);
+    slave->master = master;
+    br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+    if (old)
+        dev_put(old);
+
+    if (master)
+        slave->flags |= IFF_SLAVE;
+    else
+        slave->flags &= ~IFF_SLAVE;
+
+    rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+    return 0;
+}
+
+/**
+ *	dev_set_promiscuity	- update promiscuity count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove promsicuity from a device. While the count in the device
+ *	remains above zero the interface remains promiscuous. Once it hits zero
+ *	the device reverts back to normal filtering operation. A negative inc
+ *	value is used to drop promiscuity on the device.
+ */
+ 
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+    unsigned short old_flags = dev->flags;
+
+    dev->flags |= IFF_PROMISC;
+    if ((dev->promiscuity += inc) == 0)
+        dev->flags &= ~IFF_PROMISC;
+    if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+        if (dev->flags&IFF_PROMISC) {
+            netdev_fastroute_obstacles++;
+            dev_clear_fastroute(dev);
+        } else
+            netdev_fastroute_obstacles--;
+#endif
+        dev_mc_upload(dev);
+        printk(KERN_INFO "device %s %s promiscuous mode\n",
+               dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
+    }
+}
+
+/**
+ *	dev_set_allmulti	- update allmulti count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove reception of all multicast frames to a device. While the
+ *	count in the device remains above zero the interface remains listening
+ *	to all interfaces. Once it hits zero the device reverts back to normal
+ *	filtering operation. A negative @inc value is used to drop the counter
+ *	when releasing a resource needing all multicasts.
+ */
+
+void dev_set_allmulti(struct net_device *dev, int inc)
+{
+    unsigned short old_flags = dev->flags;
+
+    dev->flags |= IFF_ALLMULTI;
+    if ((dev->allmulti += inc) == 0)
+        dev->flags &= ~IFF_ALLMULTI;
+    if (dev->flags^old_flags)
+        dev_mc_upload(dev);
+}
+
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+    int ret;
+    int old_flags = dev->flags;
+
+    /*
+     *	Set the flags on our device.
+     */
+
+    dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
+                           IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
+        (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
+
+    /*
+     *	Load in the correct multicast list now the flags have changed.
+     */				
+
+    dev_mc_upload(dev);
+
+    /*
+     *	Have we downed the interface. We handle IFF_UP ourselves
+     *	according to user attempts to set it, rather than blindly
+     *	setting it.
+     */
+
+    ret = 0;
+    if ((old_flags^flags)&IFF_UP)	/* Bit is different  ? */
+    {
+        ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+        if (ret == 0) 
+            dev_mc_upload(dev);
+    }
+
+    if (dev->flags&IFF_UP &&
+        ((old_flags^dev->flags)&
+         ~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
+        notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+    if ((flags^dev->gflags)&IFF_PROMISC) {
+        int inc = (flags&IFF_PROMISC) ? +1 : -1;
+        dev->gflags ^= IFF_PROMISC;
+        dev_set_promiscuity(dev, inc);
+    }
+
+    /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+       is important. Some (broken) drivers set IFF_PROMISC, when
+       IFF_ALLMULTI is requested not asking us and not reporting.
+    */
+    if ((flags^dev->gflags)&IFF_ALLMULTI) {
+        int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
+        dev->gflags ^= IFF_ALLMULTI;
+        dev_set_allmulti(dev, inc);
+    }
+
+    if (old_flags^dev->flags)
+        rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
+
+    return ret;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls. 
+ */
+ 
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+    struct net_device *dev;
+    int err;
+
+    if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
+        return -ENODEV;
+
+    switch(cmd) 
+    {
+    case SIOCGIFFLAGS:	/* Get interface flags */
+        ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
+            |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
+        if (netif_running(dev) && netif_carrier_ok(dev))
+            ifr->ifr_flags |= IFF_RUNNING;
+        return 0;
+
+    case SIOCSIFFLAGS:	/* Set interface flags */
+        return dev_change_flags(dev, ifr->ifr_flags);
+		
+    case SIOCGIFMETRIC:	/* Get the metric on the interface */
+        ifr->ifr_metric = 0;
+        return 0;
+			
+    case SIOCSIFMETRIC:	/* Set the metric on the interface */
+        return -EOPNOTSUPP;
+	
+    case SIOCGIFMTU:	/* Get the MTU of a device */
+        ifr->ifr_mtu = dev->mtu;
+        return 0;
+	
+    case SIOCSIFMTU:	/* Set the MTU of a device */
+        if (ifr->ifr_mtu == dev->mtu)
+            return 0;
+
+        /*
+         *	MTU must be positive.
+         */
+			 
+        if (ifr->ifr_mtu<0)
+            return -EINVAL;
+
+        if (!netif_device_present(dev))
+            return -ENODEV;
+
+        if (dev->change_mtu)
+            err = dev->change_mtu(dev, ifr->ifr_mtu);
+        else {
+            dev->mtu = ifr->ifr_mtu;
+            err = 0;
+        }
+        if (!err && dev->flags&IFF_UP)
+            notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
+        return err;
+
+    case SIOCGIFHWADDR:
+        memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
+        ifr->ifr_hwaddr.sa_family=dev->type;
+        return 0;
+				
+    case SIOCSIFHWADDR:
+        if (dev->set_mac_address == NULL)
+            return -EOPNOTSUPP;
+        if (ifr->ifr_hwaddr.sa_family!=dev->type)
+            return -EINVAL;
+        if (!netif_device_present(dev))
+            return -ENODEV;
+        err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
+        if (!err)
+            notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+        return err;
+			
+    case SIOCSIFHWBROADCAST:
+        if (ifr->ifr_hwaddr.sa_family!=dev->type)
+            return -EINVAL;
+        memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
+        notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+        return 0;
+
+    case SIOCGIFMAP:
+        ifr->ifr_map.mem_start=dev->mem_start;
+        ifr->ifr_map.mem_end=dev->mem_end;
+        ifr->ifr_map.base_addr=dev->base_addr;
+        ifr->ifr_map.irq=dev->irq;
+        ifr->ifr_map.dma=dev->dma;
+        ifr->ifr_map.port=dev->if_port;
+        return 0;
+			
+    case SIOCSIFMAP:
+        if (dev->set_config) {
+            if (!netif_device_present(dev))
+                return -ENODEV;
+            return dev->set_config(dev,&ifr->ifr_map);
+        }
+        return -EOPNOTSUPP;
+			
+    case SIOCADDMULTI:
+        if (dev->set_multicast_list == NULL ||
+            ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+            return -EINVAL;
+        if (!netif_device_present(dev))
+            return -ENODEV;
+        dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
+        return 0;
+
+    case SIOCDELMULTI:
+        if (dev->set_multicast_list == NULL ||
+            ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+            return -EINVAL;
+        if (!netif_device_present(dev))
+            return -ENODEV;
+        dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
+        return 0;
+
+    case SIOCGIFINDEX:
+        ifr->ifr_ifindex = dev->ifindex;
+        return 0;
+
+    case SIOCSIFNAME:
+        if (dev->flags&IFF_UP)
+            return -EBUSY;
+        if (__dev_get_by_name(ifr->ifr_newname))
+            return -EEXIST;
+        memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
+        dev->name[IFNAMSIZ-1] = 0;
+        notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+        return 0;
+
+#ifdef WIRELESS_EXT
+    case SIOCGIWSTATS:
+        return dev_iwstats(dev, ifr);
+#endif	/* WIRELESS_EXT */
+
+        /*
+         *	Unknown or private ioctl
+         */
+
+    default:
+        if ((cmd >= SIOCDEVPRIVATE &&
+             cmd <= SIOCDEVPRIVATE + 15) ||
+            cmd == SIOCBONDENSLAVE ||
+            cmd == SIOCBONDRELEASE ||
+            cmd == SIOCBONDSETHWADDR ||
+            cmd == SIOCBONDSLAVEINFOQUERY ||
+            cmd == SIOCBONDINFOQUERY ||
+            cmd == SIOCBONDCHANGEACTIVE ||
+            cmd == SIOCETHTOOL ||
+            cmd == SIOCGMIIPHY ||
+            cmd == SIOCGMIIREG ||
+            cmd == SIOCSMIIREG) {
+            if (dev->do_ioctl) {
+                if (!netif_device_present(dev))
+                    return -ENODEV;
+                return dev->do_ioctl(dev, ifr, cmd);
+            }
+            return -EOPNOTSUPP;
+        }
+
+#ifdef WIRELESS_EXT
+        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+            if (dev->do_ioctl) {
+                if (!netif_device_present(dev))
+                    return -ENODEV;
+                return dev->do_ioctl(dev, ifr, cmd);
+            }
+            return -EOPNOTSUPP;
+        }
+#endif	/* WIRELESS_EXT */
+
+    }
+    return -EINVAL;
+}
+
+/*
+ * This function handles all "interface"-type I/O control requests. The actual
+ * 'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for 
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
+int dev_ioctl(unsigned int cmd, void *arg)
+{
+    struct ifreq ifr;
+    int ret;
+    char *colon;
+
+    /* One special case: SIOCGIFCONF takes ifconf argument
+       and requires shared lock, because it sleeps writing
+       to user space.
+    */
+	   
+    if (cmd == SIOCGIFCONF) {
+        return -ENOSYS;
+    }
+    if (cmd == SIOCGIFNAME) {
+        return dev_ifname((struct ifreq *)arg);
+    }
+
+    if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+        return -EFAULT;
+
+    ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+    colon = strchr(ifr.ifr_name, ':');
+    if (colon)
+        *colon = 0;
+
+    /*
+     *	See which interface the caller is talking about. 
+     */
+	 
+    switch(cmd) 
+    {
+        /*
+         *	These ioctl calls:
+         *	- can be done by all.
+         *	- atomic and do not require locking.
+         *	- return a value
+         */
+		 
+    case SIOCGIFFLAGS:
+    case SIOCGIFMETRIC:
+    case SIOCGIFMTU:
+    case SIOCGIFHWADDR:
+    case SIOCGIFSLAVE:
+    case SIOCGIFMAP:
+    case SIOCGIFINDEX:
+        dev_load(ifr.ifr_name);
+        read_lock(&dev_base_lock);
+        ret = dev_ifsioc(&ifr, cmd);
+        read_unlock(&dev_base_lock);
+        if (!ret) {
+            if (colon)
+                *colon = ':';
+            if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                return -EFAULT;
+        }
+        return ret;
+
+        /*
+         *	These ioctl calls:
+         *	- require superuser power.
+         *	- require strict serialization.
+         *	- return a value
+         */
+		 
+    case SIOCETHTOOL:
+    case SIOCGMIIPHY:
+    case SIOCGMIIREG:
+        if (!capable(CAP_NET_ADMIN))
+            return -EPERM;
+        dev_load(ifr.ifr_name);
+        dev_probe_lock();
+        rtnl_lock();
+        ret = dev_ifsioc(&ifr, cmd);
+        rtnl_unlock();
+        dev_probe_unlock();
+        if (!ret) {
+            if (colon)
+                *colon = ':';
+            if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                return -EFAULT;
+        }
+        return ret;
+
+        /*
+         *	These ioctl calls:
+         *	- require superuser power.
+         *	- require strict serialization.
+         *	- do not return a value
+         */
+		 
+    case SIOCSIFFLAGS:
+    case SIOCSIFMETRIC:
+    case SIOCSIFMTU:
+    case SIOCSIFMAP:
+    case SIOCSIFHWADDR:
+    case SIOCSIFSLAVE:
+    case SIOCADDMULTI:
+    case SIOCDELMULTI:
+    case SIOCSIFHWBROADCAST:
+    case SIOCSIFNAME:
+    case SIOCSMIIREG:
+    case SIOCBONDENSLAVE:
+    case SIOCBONDRELEASE:
+    case SIOCBONDSETHWADDR:
+    case SIOCBONDSLAVEINFOQUERY:
+    case SIOCBONDINFOQUERY:
+    case SIOCBONDCHANGEACTIVE:
+        if (!capable(CAP_NET_ADMIN))
+            return -EPERM;
+        dev_load(ifr.ifr_name);
+        dev_probe_lock();
+        rtnl_lock();
+        ret = dev_ifsioc(&ifr, cmd);
+        rtnl_unlock();
+        dev_probe_unlock();
+        return ret;
+	
+    case SIOCGIFMEM:
+        /* Get the per device memory space. We can add this but currently
+           do not support it */
+    case SIOCSIFMEM:
+        /* Set the per device memory buffer space. */
+    case SIOCSIFLINK:
+        return -EINVAL;
+
+        /*
+         *	Unknown or private ioctl.
+         */	
+		 
+    default:
+        if (cmd >= SIOCDEVPRIVATE &&
+            cmd <= SIOCDEVPRIVATE + 15) {
+            dev_load(ifr.ifr_name);
+            dev_probe_lock();
+            rtnl_lock();
+            ret = dev_ifsioc(&ifr, cmd);
+            rtnl_unlock();
+            dev_probe_unlock();
+            if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                return -EFAULT;
+            return ret;
+        }
+#ifdef WIRELESS_EXT
+        /* Take care of Wireless Extensions */
+        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+				/* If command is `set a parameter', or
+				 * `get the encoding parameters', check if
+				 * the user has the right to do it */
+            if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
+                if(!capable(CAP_NET_ADMIN))
+                    return -EPERM;
+            }
+            dev_load(ifr.ifr_name);
+            rtnl_lock();
+            ret = dev_ifsioc(&ifr, cmd);
+            rtnl_unlock();
+            if (!ret && IW_IS_GET(cmd) &&
+                copy_to_user(arg, &ifr, 
+                             sizeof(struct ifreq)))
+                return -EFAULT;
+            return ret;
+        }
+#endif	/* WIRELESS_EXT */
+        return -EINVAL;
+    }
+}
+
+
+/**
+ *	dev_new_index	-	allocate an ifindex
+ *
+ *	Returns a suitable unique value for a new device interface
+ *	number.  The caller must hold the rtnl semaphore or the
+ *	dev_base_lock to be sure it remains unique.
+ */
+ 
+int dev_new_index(void)
+{
+    static int ifindex;
+    for (;;) {
+        if (++ifindex <= 0)
+            ifindex=1;
+        if (__dev_get_by_index(ifindex) == NULL)
+            return ifindex;
+    }
+}
+
+static int dev_boot_phase = 1;
+
+/**
+ *	register_netdevice	- register a network device
+ *	@dev: device to register
+ *	
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	Callers must hold the rtnl semaphore.  See the comment at the
+ *	end of Space.c for details about the locking.  You may want
+ *	register_netdev() instead of this.
+ *
+ *	BUGS:
+ *	The locking appears insufficient to guarantee two parallel registers
+ *	will not get the same name.
+ */
+
+int net_dev_init(void);
+
+int register_netdevice(struct net_device *dev)
+{
+    struct net_device *d, **dp;
+#ifdef CONFIG_NET_DIVERT
+    int ret;
+#endif
+
+    spin_lock_init(&dev->queue_lock);
+    spin_lock_init(&dev->xmit_lock);
+    dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_FASTROUTE
+    dev->fastpath_lock=RW_LOCK_UNLOCKED;
+#endif
+
+    if (dev_boot_phase)
+        net_dev_init();
+
+#ifdef CONFIG_NET_DIVERT
+    ret = alloc_divert_blk(dev);
+    if (ret)
+        return ret;
+#endif /* CONFIG_NET_DIVERT */
+	
+    dev->iflink = -1;
+
+    /* Init, if this function is available */
+    if (dev->init && dev->init(dev) != 0) {
+#ifdef CONFIG_NET_DIVERT
+        free_divert_blk(dev);
+#endif
+        return -EIO;
+    }
+
+    dev->ifindex = dev_new_index();
+    if (dev->iflink == -1)
+        dev->iflink = dev->ifindex;
+
+    /* Check for existence, and append to tail of chain */
+    for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+        if (d == dev || strcmp(d->name, dev->name) == 0) {
+#ifdef CONFIG_NET_DIVERT
+            free_divert_blk(dev);
+#endif
+            return -EEXIST;
+        }
+    }
+    /*
+     *	nil rebuild_header routine,
+     *	that should be never called and used as just bug trap.
+     */
+
+    if (dev->rebuild_header == NULL)
+        dev->rebuild_header = default_rebuild_header;
+
+    /*
+     *	Default initial state at registry is that the
+     *	device is present.
+     */
+
+    set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+    dev->next = NULL;
+    dev_init_scheduler(dev);
+    write_lock_bh(&dev_base_lock);
+    *dp = dev;
+    dev_hold(dev);
+    dev->deadbeaf = 0;
+    write_unlock_bh(&dev_base_lock);
+
+    /* Notify protocols, that a new device appeared. */
+    notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+    return 0;
+}
+
+/**
+ *	netdev_finish_unregister - complete unregistration
+ *	@dev: device
+ *
+ *	Destroy and free a dead device. A value of zero is returned on
+ *	success.
+ */
+ 
+int netdev_finish_unregister(struct net_device *dev)
+{
+    BUG_TRAP(dev->ip_ptr==NULL);
+    BUG_TRAP(dev->ip6_ptr==NULL);
+    BUG_TRAP(dev->dn_ptr==NULL);
+
+    if (!dev->deadbeaf) {
+        printk(KERN_ERR "Freeing alive device %p, %s\n",
+               dev, dev->name);
+        return 0;
+    }
+#ifdef NET_REFCNT_DEBUG
+    printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
+           (dev->features & NETIF_F_DYNALLOC)?"":", old style");
+#endif
+    if (dev->destructor)
+        dev->destructor(dev);
+    if (dev->features & NETIF_F_DYNALLOC)
+        kfree(dev);
+    return 0;
+}
+
+/**
+ *	unregister_netdevice - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables. On success 0 is returned, on a failure
+ *	a negative errno code is returned.
+ *
+ *	Callers must hold the rtnl semaphore.  See the comment at the
+ *	end of Space.c for details about the locking.  You may want
+ *	unregister_netdev() instead of this.
+ */
+
+int unregister_netdevice(struct net_device *dev)
+{
+    unsigned long now, warning_time;
+    struct net_device *d, **dp;
+
+    /* If device is running, close it first. */
+    if (dev->flags & IFF_UP)
+        dev_close(dev);
+
+    BUG_TRAP(dev->deadbeaf==0);
+    dev->deadbeaf = 1;
+
+    /* And unlink it from device chain. */
+    for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+        if (d == dev) {
+            write_lock_bh(&dev_base_lock);
+            *dp = d->next;
+            write_unlock_bh(&dev_base_lock);
+            break;
+        }
+    }
+    if (d == NULL) {
+        printk(KERN_DEBUG "unregister_netdevice: device %s/%p"
+               " not registered\n", dev->name, dev);
+        return -ENODEV;
+    }
+
+    /* Synchronize to net_rx_action. */
+    br_write_lock_bh(BR_NETPROTO_LOCK);
+    br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+    if (dev_boot_phase == 0) {
+
+        /* Shutdown queueing discipline. */
+        dev_shutdown(dev);
+
+        /* Notify protocols, that we are about to destroy
+           this device. They should clean all the things.
+        */
+        notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+
+        /*
+         *	Flush the multicast chain
+         */
+        dev_mc_discard(dev);
+    }
+
+    if (dev->uninit)
+        dev->uninit(dev);
+
+    /* Notifier chain MUST detach us from master device. */
+    BUG_TRAP(dev->master==NULL);
+
+#ifdef CONFIG_NET_DIVERT
+    free_divert_blk(dev);
+#endif
+
+    if (dev->features & NETIF_F_DYNALLOC) {
+#ifdef NET_REFCNT_DEBUG
+        if (atomic_read(&dev->refcnt) != 1)
+            printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n",
+                   dev->name, atomic_read(&dev->refcnt)-1);
+#endif
+        dev_put(dev);
+        return 0;
+    }
+
+    /* Last reference is our one */
+    if (atomic_read(&dev->refcnt) == 1) {
+        dev_put(dev);
+        return 0;
+    }
+
+#ifdef NET_REFCNT_DEBUG
+    printk("unregister_netdevice: waiting %s refcnt=%d\n",
+           dev->name, atomic_read(&dev->refcnt));
+#endif
+
+    /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
+       it means that someone in the kernel still has a reference
+       to this device and we cannot release it.
+
+       "New style" devices have destructors, hence we can return from this
+       function and destructor will do all the work later.  As of kernel 2.4.0
+       there are very few "New Style" devices.
+
+       "Old style" devices expect that the device is free of any references
+       upon exit from this function.
+       We cannot return from this function until all such references have
+       fallen away.  This is because the caller of this function will probably
+       immediately kfree(*dev) and then be unloaded via sys_delete_module.
+
+       So, we linger until all references fall away.  The duration of the
+       linger is basically unbounded! It is driven by, for example, the
+       current setting of sysctl_ipfrag_time.
+
+       After 1 second, we start to rebroadcast unregister notifications
+       in hope that careless clients will release the device.
+
+    */
+
+    now = warning_time = jiffies;
+    while (atomic_read(&dev->refcnt) != 1) {
+        if ((jiffies - now) > 1*HZ) {
+            /* Rebroadcast unregister notification */
+            notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+        }
+        mdelay(250);
+        if ((jiffies - warning_time) > 10*HZ) {
+            printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
+                   "become free. Usage count = %d\n",
+                   dev->name, atomic_read(&dev->refcnt));
+            warning_time = jiffies;
+        }
+    }
+    dev_put(dev);
+    return 0;
+}
+
+
+/*
+ *	Initialize the DEV module. At boot time this walks the device list and
+ *	unhooks any devices that fail to initialise (normally hardware not 
+ *	present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+extern void net_device_init(void);
+extern void ip_auto_config(void);
+#ifdef CONFIG_NET_DIVERT
+extern void dv_init(void);
+#endif /* CONFIG_NET_DIVERT */
+
+
+/*
+ *       Callers must hold the rtnl semaphore.  See the comment at the
+ *       end of Space.c for details about the locking.
+ */
+int __init net_dev_init(void)
+{
+    struct net_device *dev, **dp;
+
+    if ( !dev_boot_phase )
+        return 0;
+
+    skb_init();
+
+    net_header_cachep = kmem_cache_create(
+        "net_header_cache", 
+        (PKT_PROT_LEN + sizeof(void *) - 1) & ~(sizeof(void *) - 1),
+        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+    spin_lock_init(&net_schedule_list_lock);
+    INIT_LIST_HEAD(&net_schedule_list);
+
+    /*
+     *	Add the devices.
+     *	If the call to dev->init fails, the dev is removed
+     *	from the chain disconnecting the device until the
+     *	next reboot.
+     *
+     *	NB At boot phase networking is dead. No locking is required.
+     *	But we still preserve dev_base_lock for sanity.
+     */
+    dp = &dev_base;
+    while ((dev = *dp) != NULL) {
+        spin_lock_init(&dev->queue_lock);
+        spin_lock_init(&dev->xmit_lock);
+
+        dev->xmit_lock_owner = -1;
+        dev->iflink = -1;
+        dev_hold(dev);
+
+        /*
+         * Allocate name. If the init() fails
+         * the name will be reissued correctly.
+         */
+        if (strchr(dev->name, '%'))
+            dev_alloc_name(dev, dev->name);
+
+        if (dev->init && dev->init(dev)) {
+            /*
+             * It failed to come up. It will be unhooked later.
+             * dev_alloc_name can now advance to next suitable
+             * name that is checked next.
+             */
+            dev->deadbeaf = 1;
+            dp = &dev->next;
+        } else {
+            dp = &dev->next;
+            dev->ifindex = dev_new_index();
+            if (dev->iflink == -1)
+                dev->iflink = dev->ifindex;
+            if (dev->rebuild_header == NULL)
+                dev->rebuild_header = default_rebuild_header;
+            dev_init_scheduler(dev);
+            set_bit(__LINK_STATE_PRESENT, &dev->state);
+        }
+    }
+
+    /*
+     * Unhook devices that failed to come up
+     */
+    dp = &dev_base;
+    while ((dev = *dp) != NULL) {
+        if (dev->deadbeaf) {
+            write_lock_bh(&dev_base_lock);
+            *dp = dev->next;
+            write_unlock_bh(&dev_base_lock);
+            dev_put(dev);
+        } else {
+            dp = &dev->next;
+        }
+    }
+
+    dev_boot_phase = 0;
+
+    dev_mcast_init();
+
+    /*
+     *	Initialise network devices
+     */
+	 
+    net_device_init();
+
+    return 0;
+}
+
+inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev)
+{
+    memcpy(data + ETH_ALEN, dev->dev_addr, ETH_ALEN);
+        
+    switch ( ntohs(*(unsigned short *)(data + 12)) )
+    {
+    case ETH_P_ARP:
+        if ( len < 42 ) break;
+        memcpy(data + 22, dev->dev_addr, 6);
+        return ETH_P_ARP;
+    case ETH_P_IP:
+        return ETH_P_IP;
+    }
+    return 0;
+}
+
+
+/*
+ * do_net_update:
+ * 
+ * Called from guest OS to notify updates to its transmit and/or receive
+ * descriptor rings.
+ */
+
+long do_net_update(void)
+{
+    net_ring_t *net_ring;
+    net_shadow_ring_t *shadow_ring;
+    net_vif_t *current_vif;
+    unsigned int i, j;
+    struct sk_buff *skb;
+    tx_entry_t tx;
+    rx_shadow_entry_t *rx;
+    unsigned long pfn;
+    struct pfn_info *page;
+    unsigned long *g_pte;    
+    
+    for ( j = 0; j < current->num_net_vifs; j++)
+    {
+        int target;
+        u8 *g_data;
+        unsigned short protocol;
+
+        current_vif = current->net_vif_list[j];
+        net_ring = current_vif->net_ring;
+        shadow_ring = current_vif->shadow_ring;
+        
+        /*
+         * PHASE 1 -- TRANSMIT RING
+         */
+
+        /*
+         * Collect up new transmit buffers. We collect up to the guest OS's
+         * new producer index, but take care not to catch up with our own
+         * consumer index.
+         */
+        for ( i = shadow_ring->tx_prod; 
+              (i != net_ring->tx_prod) && 
+                  (((shadow_ring->tx_cons-i) & (TX_RING_SIZE-1)) != 1); 
+              i = TX_RING_INC(i) )
+        {
+            if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
+            {
+                DPRINTK("Bad copy_from_user for tx net descriptor\n");
+                shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU;
+                continue;
+            }
+
+            shadow_ring->tx_ring[i].size   = tx.size;
+            shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE;
+
+            if ( tx.size < PKT_PROT_LEN )
+            {
+                DPRINTK("Runt packet %d\n", tx.size);
+                continue; 
+            }
+
+            if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 
+            {
+                DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", 
+                        tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
+                continue;
+            }
+
+            pfn  = tx.addr >> PAGE_SHIFT;
+            page = frame_table + pfn;
+            if ( (pfn >= max_page) || 
+                 ((page->flags & PG_domain_mask) != current->domain) ) 
+            {
+                DPRINTK("Bad page frame\n");
+                continue;
+            }
+            
+            g_data = map_domain_mem(tx.addr);
+
+            protocol = __constant_htons(
+                init_tx_header(g_data, tx.size, the_dev));
+            if ( protocol == 0 )
+                goto unmap_and_continue;
+
+            target = __net_get_target_vif(g_data, tx.size, current_vif->id);
+
+            if ( target > VIF_PHYSICAL_INTERFACE )
+            {
+                /* Local delivery */
+                if ( (skb = dev_alloc_skb(tx.size)) == NULL ) 
+                    goto unmap_and_continue;
+                
+                skb->destructor = tx_skb_release;
+
+                shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+
+                skb->src_vif = current_vif->id;
+                skb->dst_vif = target;
+                skb->protocol = protocol;
+                
+                skb->head = (u8 *)map_domain_mem(
+                    ((skb->pf - frame_table) << PAGE_SHIFT));
+                skb->data = skb->head + 16;
+                skb_reserve(skb,2);
+                memcpy(skb->data, g_data, tx.size);
+                skb->len = tx.size;
+                unmap_domain_mem(skb->head);
+                skb->data += ETH_HLEN;
+                (void)netif_rx(skb);
+            }
+            else if ( target == VIF_PHYSICAL_INTERFACE )
+            {
+                shadow_ring->tx_ring[i].header = 
+                    kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
+                if ( shadow_ring->tx_ring[i].header == NULL ) 
+                    goto unmap_and_continue;
+                memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN);
+                shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN;
+                shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+                get_page_tot(page);
+            }
+
+        unmap_and_continue:
+            unmap_domain_mem(g_data);
+        }
+        smp_wmb(); /* Let other CPUs see new descriptors first. */
+        shadow_ring->tx_prod = i;
+
+        /* XXX: This should be more consevative. */
+        add_to_net_schedule_list_tail(current_vif);
+        tasklet_schedule(&net_tx_tasklet);
+
+        /*
+         * PHASE 2 -- RECEIVE RING
+         */
+
+        /*
+         * Collect up new receive buffers. We collect up to the guest OS's
+         * new producer index, but take care not to catch up with our own
+         * consumer index.
+         */
+        for ( i = shadow_ring->rx_prod; 
+              (i != net_ring->rx_prod) && 
+                  (((shadow_ring->rx_cons-i) & (RX_RING_SIZE-1)) != 1); 
+              i = RX_RING_INC(i) )
+        {
+            /* 
+             * This copy assumes that rx_shadow_entry_t is an extension of 
+             * rx_net_entry_t extra fields must be tacked on to the end.
+             */
+            if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
+                                 sizeof (rx_entry_t) ) )
+            {
+                DPRINTK("Bad copy_from_user for rx ring\n");
+                shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
+                continue;
+            } 
+
+            rx = shadow_ring->rx_ring + i;
+            pfn = rx->addr >> PAGE_SHIFT;
+            page = frame_table + pfn;
+            
+            shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
+            
+            if ( (pfn >= max_page) || 
+                 (page->flags != (PGT_l1_page_table | current->domain)) ) 
+            {
+                DPRINTK("Bad page frame containing ppte\n");
+                continue;
+            }
+            
+            g_pte = map_domain_mem(rx->addr);
+            
+            if (!(*g_pte & _PAGE_PRESENT))
+            {
+                DPRINTK("Inavlid PTE passed down (not present)\n");
+                unmap_domain_mem(g_pte);
+                continue;
+            }
+            
+            page = (*g_pte >> PAGE_SHIFT) + frame_table;
+            
+            if (page->tot_count != 1) 
+            {
+                DPRINTK("An rx page must be mapped exactly once\n");
+                unmap_domain_mem(g_pte);
+                continue;
+            }
+            
+            /* The pte they passed was good, so take it away from them. */
+            shadow_ring->rx_ring[i].status = RING_STATUS_OK;
+            *g_pte &= ~_PAGE_PRESENT;
+            page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf;
+            rx->flush_count = tlb_flush_count[smp_processor_id()];
+            
+            unmap_domain_mem(g_pte);
+        }
+        smp_wmb(); /* Let other CPUs see new descriptors first. */
+        shadow_ring->rx_prod = net_ring->rx_prod;
+    }
+    return 0;
+}
+
+
+int setup_network_devices(void)
+{
+    int ret;
+    extern char opt_ifname[];
+    struct net_device *dev = dev_get_by_name(opt_ifname);
+
+    if ( dev == NULL ) 
+    {
+        printk("Could not find device %s\n", opt_ifname);
+        return 0;
+    }
+
+    ret = dev_open(dev);
+    if ( ret != 0 )
+    {
+        printk("Error opening device %s for use (%d)\n", opt_ifname, ret);
+        return 0;
+    }
+    printk("Device %s opened and ready for use.\n", opt_ifname);
+    the_dev = dev;
+
+    tasklet_enable(&net_tx_tasklet);
+
+    return 1;
+}
+
diff --git a/xen/net/dev_mcast.c b/xen/net/dev_mcast.c
new file mode 100644
index 0000000000..d7d2ae338d
--- /dev/null
+++ b/xen/net/dev_mcast.c
@@ -0,0 +1,276 @@
+/*
+ *	Linux NET3:	Multicast List maintenance. 
+ *
+ *	Authors:
+ *		Tim Kordas <tjk@nostromo.eeap.cwru.edu> 
+ *		Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ *	Stir fried together from the IP multicast and CAP patches above
+ *		Alan Cox <Alan.Cox@linux.org>	
+ *
+ *	Fixes:
+ *		Alan Cox	:	Update the device on a real delete
+ *					rather than any time but...
+ *		Alan Cox	:	IFF_ALLMULTI support.
+ *		Alan Cox	: 	New format set_multicast_list() calls.
+ *		Gleb Natapov    :       Remove dev_mc_lock.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h> 
+#include <linux/lib.h> 
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+//#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+//#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/proc_fs.h>
+#include <linux/init.h>
+//#include <net/ip.h>
+//#include <net/route.h>
+#include <linux/skbuff.h>
+//#include <net/sock.h>
+//#include <net/arp.h>
+
+
+/*
+ *	Device multicast list maintenance. 
+ *
+ *	This is used both by IP and by the user level maintenance functions. 
+ *	Unlike BSD we maintain a usage count on a given multicast address so 
+ *	that a casual user application can add/delete multicasts used by 
+ *	protocols without doing damage to the protocols when it deletes the
+ *	entries. It also helps IP as it tracks overlapping maps.
+ *
+ *	Device mc lists are changed by bh at least if IPv6 is enabled,
+ *	so that it must be bh protected.
+ *
+ *	We block accesses to device mc filters with dev->xmit_lock.
+ */
+
+/*
+ *	Update the multicast list into the physical NIC controller.
+ */
+ 
+static void __dev_mc_upload(struct net_device *dev)
+{
+	/* Don't do anything till we up the interface
+	 * [dev_open will call this function so the list will
+	 * stay sane]
+	 */
+
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	/*
+	 *	Devices with no set multicast or which have been
+	 *	detached don't get set.
+	 */
+
+	if (dev->set_multicast_list == NULL ||
+	    !netif_device_present(dev))
+		return;
+
+	dev->set_multicast_list(dev);
+}
+
+void dev_mc_upload(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	__dev_mc_upload(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+/*
+ *	Delete a device level multicast
+ */
+ 
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, **dmip;
+
+	spin_lock_bh(&dev->xmit_lock);
+
+	for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+		/*
+		 *	Find the entry we want to delete. The device could
+		 *	have variable length entries so check these too.
+		 */
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    alen == dmi->dmi_addrlen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 0;
+				if (old_glbl == 0)
+					break;
+			}
+			if (--dmi->dmi_users)
+				goto done;
+
+			/*
+			 *	Last user. So delete the entry.
+			 */
+			*dmip = dmi->next;
+			dev->mc_count--;
+
+			kfree(dmi);
+
+			/*
+			 *	We have altered the list, so the card
+			 *	loaded filter is now wrong. Fix it
+			 */
+			__dev_mc_upload(dev);
+			
+			spin_unlock_bh(&dev->xmit_lock);
+			return 0;
+		}
+	}
+	err = -ENOENT;
+done:
+	spin_unlock_bh(&dev->xmit_lock);
+	return err;
+}
+
+/*
+ *	Add a device level multicast
+ */
+ 
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, *dmi1;
+
+	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+	spin_lock_bh(&dev->xmit_lock);
+	for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    dmi->dmi_addrlen == alen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 1;
+				if (old_glbl)
+					goto done;
+			}
+			dmi->dmi_users++;
+			goto done;
+		}
+	}
+
+	if ((dmi = dmi1) == NULL) {
+		spin_unlock_bh(&dev->xmit_lock);
+		return -ENOMEM;
+	}
+	memcpy(dmi->dmi_addr, addr, alen);
+	dmi->dmi_addrlen = alen;
+	dmi->next = dev->mc_list;
+	dmi->dmi_users = 1;
+	dmi->dmi_gusers = glbl ? 1 : 0;
+	dev->mc_list = dmi;
+	dev->mc_count++;
+
+	__dev_mc_upload(dev);
+	
+	spin_unlock_bh(&dev->xmit_lock);
+	return 0;
+
+done:
+	spin_unlock_bh(&dev->xmit_lock);
+	if (dmi1)
+		kfree(dmi1);
+	return err;
+}
+
+/*
+ *	Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	
+	while (dev->mc_list != NULL) {
+		struct dev_mc_list *tmp = dev->mc_list;
+		dev->mc_list = tmp->next;
+		if (tmp->dmi_users > tmp->dmi_gusers)
+			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+		kfree(tmp);
+	}
+	dev->mc_count = 0;
+
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+			    int length, int *eof, void *data)
+{
+	off_t pos = 0, begin = 0;
+	struct dev_mc_list *m;
+	int len = 0;
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		spin_lock_bh(&dev->xmit_lock);
+		for (m = dev->mc_list; m; m = m->next) {
+			int i;
+
+			len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex,
+				       dev->name, m->dmi_users, m->dmi_gusers);
+
+			for (i = 0; i < m->dmi_addrlen; i++)
+				len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+			len += sprintf(buffer+len, "\n");
+
+			pos = begin + len;
+			if (pos < offset) {
+				len = 0;
+				begin = pos;
+			}
+			if (pos > offset + length) {
+				spin_unlock_bh(&dev->xmit_lock);
+				goto done;
+			}
+		}
+		spin_unlock_bh(&dev->xmit_lock);
+	}
+	*eof = 1;
+
+done:
+	read_unlock(&dev_base_lock);
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+#endif
+
+void __init dev_mcast_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	create_proc_read_entry("net/dev_mcast", 0, 0, dev_mc_read_proc, NULL);
+#endif
+}
+
diff --git a/xen/net/devinit.c b/xen/net/devinit.c
new file mode 100644
index 0000000000..f3ce2c39d4
--- /dev/null
+++ b/xen/net/devinit.c
@@ -0,0 +1,109 @@
+/******************************************************************************
+ * devinit.c
+ * 
+ * This is the watchdog timer routines, ripped from sch_generic.c
+ * Original copyright notice appears below.
+ * 
+ */
+
+/*
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
+ *              - Ingress support
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/lib.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+static void dev_watchdog(unsigned long arg)
+{
+    struct net_device *dev = (struct net_device *)arg;
+
+    spin_lock(&dev->xmit_lock);
+    if (netif_device_present(dev) &&
+        netif_running(dev) &&
+        netif_carrier_ok(dev)) {
+        if (netif_queue_stopped(dev) &&
+            (jiffies - dev->trans_start) > dev->watchdog_timeo) {
+            printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
+            dev->tx_timeout(dev);
+        }
+        if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+            dev_hold(dev);
+    }
+    spin_unlock(&dev->xmit_lock);
+
+    dev_put(dev);
+}
+
+static void dev_watchdog_init(struct net_device *dev)
+{
+    init_timer(&dev->watchdog_timer);
+    dev->watchdog_timer.data = (unsigned long)dev;
+    dev->watchdog_timer.function = dev_watchdog;
+}
+
+void __netdev_watchdog_up(struct net_device *dev)
+{
+    if (dev->tx_timeout) {
+        if (dev->watchdog_timeo <= 0)
+            dev->watchdog_timeo = 5*HZ;
+        if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+            dev_hold(dev);
+    }
+}
+
+static void dev_watchdog_up(struct net_device *dev)
+{
+    spin_lock_bh(&dev->xmit_lock);
+    __netdev_watchdog_up(dev);
+    spin_unlock_bh(&dev->xmit_lock);
+}
+
+static void dev_watchdog_down(struct net_device *dev)
+{
+    spin_lock_bh(&dev->xmit_lock);
+    if (del_timer(&dev->watchdog_timer))
+        __dev_put(dev);
+    spin_unlock_bh(&dev->xmit_lock);
+}
+
+void dev_activate(struct net_device *dev)
+{
+    spin_lock_bh(&dev->queue_lock);
+    dev->trans_start = jiffies;
+    dev_watchdog_up(dev);
+    spin_unlock_bh(&dev->queue_lock);
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+    dev_watchdog_down(dev);
+}
+
+void dev_init_scheduler(struct net_device *dev)
+{
+    dev_watchdog_init(dev);
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+}
diff --git a/xen/net/eth.c b/xen/net/eth.c
new file mode 100644
index 0000000000..5238de022e
--- /dev/null
+++ b/xen/net/eth.c
@@ -0,0 +1,252 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Ethernet-type device handling.
+ *
+ * Version:	@(#)eth.c	1.0.7	05/25/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Florian  La Roche, <rzsfl@rz.uni-sb.de>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * 
+ * Fixes:
+ *		Mr Linux	: Arp problems
+ *		Alan Cox	: Generic queue tidyup (very tiny here)
+ *		Alan Cox	: eth_header ntohs should be htons
+ *		Alan Cox	: eth_rebuild_header missing an htons and
+ *				  minor other things.
+ *		Tegge		: Arp bug fixes. 
+ *		Florian		: Removed many unnecessary functions, code cleanup
+ *				  and changes for new arp and skbuff.
+ *		Alan Cox	: Redid header building to reflect new format.
+ *		Alan Cox	: ARP only when compiled with CONFIG_INET
+ *		Greg Page	: 802.2 and SNAP stuff.
+ *		Alan Cox	: MAC layer pointers/new format.
+ *		Paul Gortmaker	: eth_copy_and_sum shouldn't csum padding.
+ *		Alan Cox	: Protect against forwarding explosions with
+ *				  older network drivers and IFF_ALLMULTI.
+ *	Christer Weinigel	: Better rebuild header message.
+ *             Andrew Morton    : 26Feb01: kill ether_setup() - use netdev_boot_setup().
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+//#include <linux/in.h>
+//#include <linux/inet.h>
+//#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/init.h>
+//#include <net/dst.h>
+//#include <net/arp.h>
+//#include <net/sock.h>
+//#include <net/ipv6.h>
+//#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+//#include <asm/checksum.h>
+
+//extern int __init netdev_boot_setup(char *str);
+
+//__setup("ether=", netdev_boot_setup);
+
+/*
+ *	 Create the Ethernet MAC header for an arbitrary protocol layer 
+ *
+ *	saddr=NULL	means use device source address
+ *	daddr=NULL	means leave destination address (eg unresolved arp)
+ */
+
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	   void *daddr, void *saddr, unsigned len)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+
+	/* 
+	 *	Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+	 *	in here instead. It is up to the 802.2 layer to carry protocol information.
+	 */
+	
+	if(type!=ETH_P_802_3) 
+		eth->h_proto = htons(type);
+	else
+		eth->h_proto = htons(len);
+
+	/*
+	 *	Set the source hardware address. 
+	 */
+	 
+	if(saddr)
+		memcpy(eth->h_source,saddr,dev->addr_len);
+	else
+		memcpy(eth->h_source,dev->dev_addr,dev->addr_len);
+
+	/*
+	 *	Anyway, the loopback-device should never use this function... 
+	 */
+
+	if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 
+	{
+		memset(eth->h_dest, 0, dev->addr_len);
+		return(dev->hard_header_len);
+	}
+	
+	if(daddr)
+	{
+		memcpy(eth->h_dest,daddr,dev->addr_len);
+		return dev->hard_header_len;
+	}
+	
+	return -dev->hard_header_len;
+}
+
+
+/*
+ *	Rebuild the Ethernet MAC header. This is called after an ARP
+ *	(or in future other address resolution) has completed on this
+ *	sk_buff. We now let ARP fill in the other fields.
+ *
+ *	This routine CANNOT use cached dst->neigh!
+ *	Really, it is used only when dst->neigh is wrong.
+ */
+
+int eth_rebuild_header(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+	struct net_device *dev = skb->dev;
+
+	switch (eth->h_proto)
+	{
+#ifdef CONFIG_INET
+	case __constant_htons(ETH_P_IP):
+ 		return arp_find(eth->h_dest, skb);
+#endif	
+	default:
+		printk(KERN_DEBUG
+		       "%s: unable to resolve type %X addresses.\n", 
+		       dev->name, (int)eth->h_proto);
+		
+		memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+		break;
+	}
+
+	return 0;
+}
+
+
+/*
+ *	Determine the packet's protocol ID. The rule here is that we 
+ *	assume 802.3 if the type field is short enough to be a length.
+ *	This is normal practice and works for any 'now in use' protocol.
+ */
+ 
+unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ethhdr *eth;
+	unsigned char *rawp;
+	
+        if (skb->skb_type == SKB_ZERO_COPY)
+        {
+            skb_pull(skb,dev->hard_header_len);
+            skb->mac.raw= (void *)0xdeadbeef;
+            return htons(ETH_P_802_2);
+            
+        } else { // SKB_NORMAL
+        
+	    skb->mac.raw=skb->data;
+	    skb_pull(skb,dev->hard_header_len);
+	    eth= skb->mac.ethernet;
+	
+	    if(*eth->h_dest&1)
+	    {
+	    	if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
+			skb->pkt_type=PACKET_BROADCAST;
+		else
+			skb->pkt_type=PACKET_MULTICAST;
+	    }
+	
+	    /*
+	    *	This ALLMULTI check should be redundant by 1.4
+	    *	so don't forget to remove it.
+	    *
+	    *	Seems, you forgot to remove it. All silly devices
+	    *	seems to set IFF_PROMISC.
+	    */
+	 
+	    else if(1 /*dev->flags&IFF_PROMISC*/)
+	    {
+		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
+			skb->pkt_type=PACKET_OTHERHOST;
+	    }
+	
+	    if (ntohs(eth->h_proto) >= 1536)
+		return eth->h_proto;
+		
+	    rawp = skb->data;
+	
+	    /*
+	    *	This is a magic hack to spot IPX packets. Older Novell breaks
+	    *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	    *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	    *	won't work for fault tolerant netware but does for the rest.
+	    */
+	    if (*(unsigned short *)rawp == 0xFFFF)
+		return htons(ETH_P_802_3);
+		
+	    /*
+	    *	Real 802.2 LLC
+	    */
+	    return htons(ETH_P_802_2);
+        }
+}
+
+
+int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
+{
+	struct ethhdr *eth = skb->mac.ethernet;
+	memcpy(haddr, eth->h_source, ETH_ALEN);
+	return ETH_ALEN;
+}
+
+int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+{
+#if 0
+	unsigned short type = hh->hh_type;
+	struct ethhdr *eth = (struct ethhdr*)(((u8*)hh->hh_data) + 2);
+	struct net_device *dev = neigh->dev;
+
+	if (type == __constant_htons(ETH_P_802_3))
+		return -1;
+
+	eth->h_proto = type;
+	memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+	memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+	hh->hh_len = ETH_HLEN;
+#endif
+	return 0;
+}
+
+/*
+ * Called by Address Resolution module to notify changes in address.
+ */
+
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+{
+	memcpy(((u8*)hh->hh_data) + 2, haddr, dev->addr_len);
+}
diff --git a/xen/net/skbuff.c b/xen/net/skbuff.c
new file mode 100644
index 0000000000..695a6f6b63
--- /dev/null
+++ b/xen/net/skbuff.c
@@ -0,0 +1,501 @@
+/*
+ *	Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *			Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *	Version:	$Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
+ *
+ *	Fixes:	
+ *		Alan Cox	:	Fixed the worst of the load balancer bugs.
+ *		Dave Platt	:	Interrupt stacking fix.
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	Changed buffer format.
+ *		Alan Cox	:	destructor hook for AF_UNIX etc.
+ *		Linus Torvalds	:	Better skb_clone.
+ *		Alan Cox	:	Added skb_copy.
+ *		Alan Cox	:	Added all the changed routines Linus
+ *					only put in the headers
+ *		Ray VanTassle	:	Fixed --skb->lock in free
+ *		Alan Cox	:	skb_copy copy arp field
+ *		Andi Kleen	:	slabified it.
+ *
+ *	NOTE:
+ *		The __skb_ routines should be called with interrupts 
+ *	disabled, or you better be *real* sure that the operation is atomic 
+ *	with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *	or via disabling bottom half handlers, etc).
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#define BUG_TRAP ASSERT
+
+int sysctl_hot_list_len = 128;
+
+static kmem_cache_t *skbuff_head_cache;
+
+static union {
+    struct sk_buff_head	list;
+    char			pad[SMP_CACHE_BYTES];
+} skb_head_pool[NR_CPUS];
+
+/*
+ *	Keep out-of-line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always
+ *	reliable. 
+ */
+
+/**
+ *	skb_over_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_put(). Not user callable.
+ */
+ 
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+    printk("skput:over: %p:%d put:%d dev:%s", 
+           here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+    BUG();
+}
+
+/**
+ *	skb_under_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_push(). Not user callable.
+ */
+ 
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+    printk("skput:under: %p:%d put:%d dev:%s",
+           here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+    BUG();
+}
+
+static __inline__ struct sk_buff *skb_head_from_pool(void)
+{
+    struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+    if (skb_queue_len(list)) {
+        struct sk_buff *skb;
+        unsigned long flags;
+
+        local_irq_save(flags);
+        skb = __skb_dequeue(list);
+        local_irq_restore(flags);
+        return skb;
+    }
+    return NULL;
+}
+
+static __inline__ void skb_head_to_pool(struct sk_buff *skb)
+{
+    struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+    if (skb_queue_len(list) < sysctl_hot_list_len) {
+        unsigned long flags;
+
+        local_irq_save(flags);
+        __skb_queue_head(list, skb);
+        local_irq_restore(flags);
+
+        return;
+    }
+    kmem_cache_free(skbuff_head_cache, skb);
+}
+
+static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
+{
+    struct list_head *list_ptr;
+    struct pfn_info  *pf;
+    unsigned long flags;
+        
+    spin_lock_irqsave(&free_list_lock, flags);
+
+    if (!free_pfns) return NULL;
+
+    list_ptr = free_list.next;
+    pf = list_entry(list_ptr, struct pfn_info, list);
+    pf->flags = 0; /* owned by dom0 */
+    list_del(&pf->list);
+    free_pfns--;
+
+    spin_unlock_irqrestore(&free_list_lock, flags);
+
+    skb->pf = pf;
+    return (u8 *)((pf - frame_table) << PAGE_SHIFT);
+}
+
+static inline void dealloc_skb_data_page(struct sk_buff *skb)
+{
+    struct pfn_info  *pf;
+    unsigned long flags;
+
+    pf = skb->pf;
+
+    spin_lock_irqsave(&free_list_lock, flags);
+        
+    list_add(&pf->list, &free_list);
+    free_pfns++;
+
+    spin_unlock_irqrestore(&free_list_lock, flags);
+
+}
+
+static inline void INTERRUPT_CHECK(int gfp_mask)
+{
+    if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+        printk(KERN_ERR "alloc_skb called nonatomically\n");
+        BUG();
+    }
+}
+
+
+/**
+ *	alloc_skb	-	allocate a network buffer
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+ 
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
+{
+    struct sk_buff *skb;
+    u8 *data;
+
+    INTERRUPT_CHECK(gfp_mask);
+
+    /* Get the HEAD */
+    skb = skb_head_from_pool();
+    if (skb == NULL) {
+        skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+        if (skb == NULL)
+            goto nohead;
+    }
+
+    /* Get the DATA. Size must match skb_add_mtu(). */
+    size = SKB_DATA_ALIGN(size);
+    data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+    if (data == NULL)
+        goto nodata;
+
+    /* Load the data pointers. */
+    skb->head = data;
+    skb->data = data;
+    skb->tail = data;
+    skb->end = data + size;
+
+    /* Set up other state */
+    skb->len = 0;
+    skb->data_len = 0;
+    skb->src_vif = VIF_UNKNOWN_INTERFACE;
+    skb->dst_vif = VIF_UNKNOWN_INTERFACE;
+    skb->skb_type = SKB_NORMAL;
+
+    skb_shinfo(skb)->nr_frags = 0;
+    return skb;
+
+ nodata:
+    skb_head_to_pool(skb);
+ nohead:
+    return NULL;
+}
+
+
+struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
+{
+    struct sk_buff *skb;
+    u8 *data;
+
+    INTERRUPT_CHECK(gfp_mask);
+
+    /* Get the HEAD */
+    skb = skb_head_from_pool();
+    if (skb == NULL) {
+        skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+        if (skb == NULL)
+            goto nohead;
+    }
+
+    /* Get the DATA. Size must match skb_add_mtu(). */
+    size = SKB_DATA_ALIGN(size);
+    data = alloc_skb_data_page(skb);
+
+    if (data == NULL)
+        goto nodata;
+
+    /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */
+    data = phys_to_virt((unsigned long)data); 
+        
+    /* Load the data pointers. */
+    skb->head = data;
+    skb->data = data;
+    skb->tail = data;
+    skb->end = data + size;
+
+    /* Set up other state */
+    skb->len = 0;
+    skb->data_len = 0;
+    skb->src_vif = VIF_UNKNOWN_INTERFACE;
+    skb->dst_vif = VIF_UNKNOWN_INTERFACE;
+    skb->skb_type = SKB_ZERO_COPY;
+
+    skb_shinfo(skb)->nr_frags = 0;
+
+    return skb;
+
+ nodata:
+    skb_head_to_pool(skb);
+ nohead:
+    return NULL;
+}
+
+
+struct sk_buff *alloc_skb_nodata(int gfp_mask)
+{
+    struct sk_buff *skb;
+
+    INTERRUPT_CHECK(gfp_mask);
+
+    /* Get the HEAD */
+    skb = skb_head_from_pool();
+    if (skb == NULL) {
+        skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+        if (skb == NULL)
+            return NULL;
+    }
+
+    skb->skb_type = SKB_NODATA;
+    return skb;
+}
+
+
+/*
+ *	Slab constructor for a skb head. 
+ */ 
+static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
+				  unsigned long flags)
+{
+    struct sk_buff *skb = p;
+
+    skb->next = NULL;
+    skb->prev = NULL;
+    skb->list = NULL;
+    skb->dev = NULL;
+    skb->pkt_type = PACKET_HOST;	/* Default type */
+    skb->ip_summed = 0;
+    skb->destructor = NULL;
+}
+
+static void skb_release_data(struct sk_buff *skb)
+{
+    if (skb_shinfo(skb)->nr_frags) BUG();
+
+    switch ( skb->skb_type )
+    {
+    case SKB_NORMAL:
+        kfree(skb->head);
+        break;
+    case SKB_ZERO_COPY:
+        dealloc_skb_data_page(skb);
+        break;
+    case SKB_NODATA:
+        break;
+    default:
+        BUG();
+    }
+}
+
+/*
+ *	Free an skbuff by memory without cleaning the state. 
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+    skb_release_data(skb);
+    skb_head_to_pool(skb);
+}
+
+/**
+ *	__kfree_skb - private function 
+ *	@skb: buffer
+ *
+ *	Free an sk_buff. Release anything attached to the buffer. 
+ *	Clean the state. This is an internal helper function. Users should
+ *	always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+    if ( skb->list )
+        panic(KERN_WARNING "Warning: kfree_skb passed an skb still "
+              "on a list (from %p).\n", NET_CALLER(skb));
+
+    if ( skb->destructor )
+        skb->destructor(skb);
+
+    skb_headerinit(skb, NULL, 0);  /* clean state */
+    kfree_skbmem(skb);
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+    /*
+     *	Shift between the two data areas in bytes
+     */
+    unsigned long offset = new->data - old->data;
+
+    new->list=NULL;
+    new->dev=old->dev;
+    new->protocol=old->protocol;
+    new->h.raw=old->h.raw+offset;
+    new->nh.raw=old->nh.raw+offset;
+    new->mac.raw=old->mac.raw+offset;
+    new->pkt_type=old->pkt_type;
+    new->destructor = NULL;
+}
+
+/**
+ *	skb_copy	-	create private copy of an sk_buff
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data. This is used when the
+ *	caller wishes to modify the data and needs a private copy of the 
+ *	data to alter. Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	As by-product this function converts non-linear &sk_buff to linear
+ *	one, so that &sk_buff becomes completely private and caller is allowed
+ *	to modify all the data of returned buffer. This means that this
+ *	function is not recommended for use in circumstances when only
+ *	header is going to be modified. Use pskb_copy() instead.
+ */
+ 
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+    struct sk_buff *n;
+    int headerlen = skb->data-skb->head;
+
+    /*
+     *	Allocate the copy buffer
+     */
+    n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+    if(n==NULL)
+        return NULL;
+
+    /* Set the data pointer */
+    skb_reserve(n,headerlen);
+    /* Set the tail pointer and length */
+    skb_put(n,skb->len);
+    n->csum = skb->csum;
+    n->ip_summed = skb->ip_summed;
+
+    if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
+        BUG();
+
+    copy_skb_header(n, skb);
+
+    return n;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+    int i, copy;
+    int start = skb->len - skb->data_len;
+
+    if (offset > (int)skb->len-len)
+        goto fault;
+
+    /* Copy header. */
+    if ((copy = start-offset) > 0) {
+        if (copy > len)
+            copy = len;
+        memcpy(to, skb->data + offset, copy);
+        if ((len -= copy) == 0)
+            return 0;
+        offset += copy;
+        to += copy;
+    }
+
+    for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+        int end;
+
+        BUG_TRAP(start <= offset+len);
+
+        end = start + skb_shinfo(skb)->frags[i].size;
+        if ((copy = end-offset) > 0) {
+            u8 *vaddr;
+
+            if (copy > len)
+                copy = len;
+
+            vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+            memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
+                   offset-start, copy);
+            kunmap_skb_frag(vaddr);
+
+            if ((len -= copy) == 0)
+                return 0;
+            offset += copy;
+            to += copy;
+        }
+        start = end;
+    }
+
+    if (len == 0)
+        return 0;
+
+ fault:
+    return -EFAULT;
+}
+
+void __init skb_init(void)
+{
+    int i;
+
+    skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+                                          sizeof(struct sk_buff),
+                                          0,
+                                          SLAB_HWCACHE_ALIGN,
+                                          skb_headerinit, NULL);
+    if (!skbuff_head_cache)
+        panic("cannot create skbuff cache");
+
+    for (i=0; i<NR_CPUS; i++)
+        skb_queue_head_init(&skb_head_pool[i].list);
+}
diff --git a/xen/tools/Makefile b/xen/tools/Makefile
new file mode 100644
index 0000000000..ccf535aa49
--- /dev/null
+++ b/xen/tools/Makefile
@@ -0,0 +1,6 @@
+
+elf-reloc: elf-reloc.c
+	gcc -O2 -Wall -o $@ $<
+
+clean:
+	rm -f elf-reloc *~ core
diff --git a/xen/tools/elf-reloc.c b/xen/tools/elf-reloc.c
new file mode 100644
index 0000000000..19a839ee84
--- /dev/null
+++ b/xen/tools/elf-reloc.c
@@ -0,0 +1,118 @@
+/******************************************************************************
+ * elf-reloc.c
+ * 
+ * Usage: elf-reloc <old base> <new base> <image>
+ * 
+ * Relocates <image> from <old base> address to <new base> address by
+ * frobbing the Elf headers. Segment contents are unmodified!
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef unsigned long  Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned long  Elf32_Off;
+typedef unsigned long  Elf32_Word;
+
+typedef struct {
+    unsigned char e_ident[16];
+    Elf32_Half    e_type;
+    Elf32_Half    e_machine;
+    Elf32_Word    e_version;
+    Elf32_Addr    e_entry;
+    Elf32_Off     e_phoff;
+    Elf32_Off     e_shoff;
+    Elf32_Word    e_flags;
+    Elf32_Half    e_ehsize;
+    Elf32_Half    e_phentsize;
+    Elf32_Half    e_phnum;
+    Elf32_Half    e_shentsize;
+    Elf32_Half    e_shnum;
+    Elf32_Half    e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct {
+    Elf32_Word    p_type;
+    Elf32_Off     p_offset;
+    Elf32_Addr    p_vaddr;
+    Elf32_Addr    p_paddr;
+    Elf32_Word    p_filesz;
+    Elf32_Word    p_memsz;
+    Elf32_Word    p_flags;
+    Elf32_Word    p_align;
+} Elf32_Phdr;
+
+#define offsetof(_f,_p) ((unsigned long)&(((_p *)0)->_f))
+
+
+/* Add @reloc_distance to address at offset @off in file @fp. */
+void reloc(FILE *fp, long off, unsigned long reloc_distance)
+{
+    unsigned long base;
+    fseek(fp, off, SEEK_SET);
+    fread(&base, sizeof(base), 1, fp);
+    base += reloc_distance;
+    fseek(fp, off, SEEK_SET);
+    fwrite(&base, sizeof(base), 1, fp);
+
+}
+
+
+int main(int argc, char **argv)
+{
+    unsigned long old_base, new_base, reloc_distance;
+    long virt_section, phys_section;
+    char *image_name;
+    FILE *fp;
+    Elf32_Off phoff;
+    Elf32_Half phnum, phentsz;
+    int i;
+
+    if ( argc != 4 )
+    {
+        fprintf(stderr, "Usage: elf-reloc <old base> <new base> <image>\n");
+        return(1);
+    }
+
+    old_base = strtoul(argv[1], NULL, 16);
+    new_base = strtoul(argv[2], NULL, 16);
+    image_name = argv[3];
+
+    printf("Relocating `%s' from 0x%08lX to 0x%08lX\n",
+           image_name, old_base, new_base);
+
+    fp = fopen(image_name, "rb+");
+    if ( !fp )
+    {
+        fprintf(stderr, "Failed to load image!\n");
+        return(1);
+    }
+
+    reloc_distance = new_base - old_base;
+
+    /* First frob the entry address. */
+    reloc(fp, offsetof(e_entry, Elf32_Ehdr), reloc_distance);
+
+    fseek(fp, offsetof(e_phoff, Elf32_Ehdr), SEEK_SET);
+    fread(&phoff, sizeof(phoff), 1, fp);
+    fseek(fp, offsetof(e_phnum, Elf32_Ehdr), SEEK_SET);
+    fread(&phnum, sizeof(phnum), 1, fp);
+    fseek(fp, offsetof(e_phentsize, Elf32_Ehdr), SEEK_SET);
+    fread(&phentsz, sizeof(phentsz), 1, fp);
+
+    virt_section = (long)phoff + offsetof(p_vaddr, Elf32_Phdr);
+    phys_section = (long)phoff + offsetof(p_paddr, Elf32_Phdr);
+    for ( i = 0; i < phnum; i++ )
+    {
+        reloc(fp, phys_section, reloc_distance);
+        reloc(fp, virt_section, reloc_distance);
+        phys_section += phentsz;
+        virt_section += phentsz;
+    }
+
+    fclose(fp);
+
+    return(0);
+}
author	iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>	2003-02-24 16:59:11 +0000
committer	iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>	2003-02-24 16:59:11 +0000
commit	0b109673722dd825609e7cc51ca124693f0b8240 (patch)
tree	ba95c49e5ebe85976c365b232c8f48634784cca1 /xen
parent	a48212cb65e09669ed243581556529681cebba0a (diff)
download	xen-0b109673722dd825609e7cc51ca124693f0b8240.tar.gz xen-0b109673722dd825609e7cc51ca124693f0b8240.tar.bz2 xen-0b109673722dd825609e7cc51ca124693f0b8240.zip