aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.rootkeys16
-rw-r--r--BitKeeper/etc/ignore16
-rw-r--r--xen-2.4.16/Rules.mk1
-rw-r--r--xen-2.4.16/arch/i386/entry.S1
-rw-r--r--xen-2.4.16/arch/i386/io_apic.c228
-rw-r--r--xen-2.4.16/arch/i386/mpparse.c1262
-rw-r--r--xen-2.4.16/arch/i386/process.c1
-rw-r--r--xen-2.4.16/arch/i386/setup.c11
-rw-r--r--xen-2.4.16/arch/i386/smpboot.c14
-rw-r--r--xen-2.4.16/arch/i386/traps.c20
-rw-r--r--xen-2.4.16/common/event.c4
-rw-r--r--xen-2.4.16/drivers/Makefile4
-rw-r--r--xen-2.4.16/drivers/block/ll_rw_blk.c27
-rw-r--r--xen-2.4.16/drivers/block/xen_block.c620
-rw-r--r--xen-2.4.16/drivers/char/Makefile8
-rw-r--r--xen-2.4.16/drivers/char/xen_kbd.c111
-rw-r--r--xen-2.4.16/drivers/char/xen_serial.c140
-rw-r--r--xen-2.4.16/drivers/ide/ide-disk.c15
-rw-r--r--xen-2.4.16/drivers/ide/ide-disk.c.orig1550
-rw-r--r--xen-2.4.16/drivers/ide/ide-probe.c43
-rw-r--r--xen-2.4.16/drivers/ide/ide-taskfile.c2
-rw-r--r--xen-2.4.16/drivers/ide/ide-xeno.c46
-rw-r--r--xen-2.4.16/drivers/ide/ide.c12
-rw-r--r--xen-2.4.16/drivers/ide/piix.c536
-rw-r--r--xen-2.4.16/drivers/net/Makefile3
-rw-r--r--xen-2.4.16/drivers/net/e1000/e1000.h3
-rw-r--r--xen-2.4.16/drivers/net/e1000/e1000_hw.c2
-rw-r--r--xen-2.4.16/drivers/net/e1000/e1000_main.c5
-rw-r--r--xen-2.4.16/drivers/net/e1000/e1000_osdep.h4
-rw-r--r--xen-2.4.16/include/asm-i386/apic.h35
-rw-r--r--xen-2.4.16/include/asm-i386/apicdef.h15
-rw-r--r--xen-2.4.16/include/asm-i386/io_apic.h8
-rw-r--r--xen-2.4.16/include/asm-i386/mpspec.h25
-rw-r--r--xen-2.4.16/include/asm-i386/processor.h10
-rw-r--r--xen-2.4.16/include/asm-i386/smpboot.h103
-rw-r--r--xen-2.4.16/include/hypervisor-ifs/block.h85
-rw-r--r--xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h9
-rw-r--r--xen-2.4.16/include/xeno/blkdev.h1
-rw-r--r--xen-2.4.16/include/xeno/config.h1
-rw-r--r--xen-2.4.16/include/xeno/sched.h2
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile2
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c827
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c233
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c27
-rw-r--r--xenolinux-2.4.16-sparse/drivers/block/Config.in51
-rw-r--r--xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c5
-rw-r--r--xenolinux-2.4.16-sparse/fs/partitions/check.c443
-rw-r--r--xenolinux-2.4.16-sparse/fs/partitions/msdos.c642
-rw-r--r--xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h10
-rw-r--r--xenolinux-2.4.16-sparse/include/linux/blk.h416
-rw-r--r--xenolinux-2.4.16-sparse/include/linux/major.h199
-rw-r--r--xenolinux-2.4.16-sparse/init/main.c871
52 files changed, 8154 insertions, 571 deletions
diff --git a/.rootkeys b/.rootkeys
index 0b792c5ac0..e7f19a6ecd 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -59,16 +59,23 @@
3ddb79beME_0abStePF6fU8XLuQnWw xen-2.4.16/drivers/block/elevator.c
3ddb79beNQVrdGyoI4njXhgAjD6a4A xen-2.4.16/drivers/block/genhd.c
3ddb79beyWwLRP_BiM2t1JKgr_plEw xen-2.4.16/drivers/block/ll_rw_blk.c
+3e4a8cb7RhubVgsPwO7cK0pgAN8WCQ xen-2.4.16/drivers/block/xen_block.c
+3e4a8cb7alzQCDKS7MlioPoHBKYkdQ xen-2.4.16/drivers/char/Makefile
+3e4a8cb7WmiYdC-ASGiCSG_CL8vsqg xen-2.4.16/drivers/char/xen_kbd.c
+3e4a8cb7nMChlro4wvOBo76n__iCFA xen-2.4.16/drivers/char/xen_serial.c
3ddb79bdhcqD9ebrslr0O0oHqTiiXg xen-2.4.16/drivers/ide/Makefile
3ddb79bdErDn_WC3G-fWxKNR3viLnA xen-2.4.16/drivers/ide/ide-disk.c
+3e4a8cb7DcFFHW_fG_OHbY_6f3lPWw xen-2.4.16/drivers/ide/ide-disk.c.orig
3ddb79bdIPNW36FrlId94jTXaW8HoA xen-2.4.16/drivers/ide/ide-dma.c
3ddb79be5Ysvhn4se_Z-LQY_hI6UPw xen-2.4.16/drivers/ide/ide-features.c
3ddb79bdh1ohsWYRH_KdaXr7cqs12w xen-2.4.16/drivers/ide/ide-geometry.c
3ddb79bdYcxXT-2UEaDcG0Ic4MIK1g xen-2.4.16/drivers/ide/ide-pci.c
3ddb79bdOXTbcImJo8DwmlNX88k78Q xen-2.4.16/drivers/ide/ide-probe.c
3ddb79bdDWFwINnKn29RlFDwGJhjYg xen-2.4.16/drivers/ide/ide-taskfile.c
+3e4a8d40XMqvT05EwZwJg1HMsFDUBA xen-2.4.16/drivers/ide/ide-xeno.c
3ddb79bdkDY1bSOYkToP1Cc49VdBxg xen-2.4.16/drivers/ide/ide.c
3ddb79bdPyAvT_WZTAFhaX0jp-yXSw xen-2.4.16/drivers/ide/ide_modes.h
+3e4a8d401aSwOzCScQXR3lsmNlAwUQ xen-2.4.16/drivers/ide/piix.c
3ddb79bfogeJNHTIepPjd8fy1TyoTw xen-2.4.16/drivers/net/3c509.c
3ddb79bfMlOcWUwjtg6oMYhGySHDDw xen-2.4.16/drivers/net/3c59x.c
3ddb79bfl_DWxZQFKiJ2BXrSedV4lg xen-2.4.16/drivers/net/8139cp.c
@@ -271,9 +278,12 @@
3ddb79b7v_Be34as7_mlzFlw65hOjQ xenolinux-2.4.16-sparse/arch/xeno/defconfig
3ddb79b7KUvtx0knQJoRaBDZQeNidg xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
3ddb79b6Rc0uAOGFthIFxq1KGWZ_Iw xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c
+3e4a8cb7JECr--r1ipnrkd7NKdbUqQ xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
+3e4a8cb7SLWsLTXQjv7ng6-3hL4pCA xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c
3ddb79b7LLVJBGynxHSOh9A9l97sug xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile
3ddb79b7UG2QiRAU-Wvc1Y_BLigu1Q xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c
3ddb79b75eo4PRXkT6Th9popt_SJhg xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile
+3e4a8cb79dT0F4q5T4GEqMj4CtAquQ xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c
3ddb79b7Xyaoep6U0kLvx6Kx7OauDw xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c
3df9ce13K7qSLBtHV-01QHPW62649Q xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_memory.c
3ddb79b7PulSkF9m3c7K5MkxHRf4hA xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h
@@ -317,10 +327,13 @@
3ddb79b83Zj7Xn2QVhU4HeMuAC9FjA xenolinux-2.4.16-sparse/arch/xeno/mm/init.c
3df9ce13TRWIv0Mawm15zESP7jcT7A xenolinux-2.4.16-sparse/arch/xeno/mm/mmu_context.c
3ddb79b7aKdTkbr3u6aze8tVwGh_TQ xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds
+3e4a8cb7lpFFwT1Iu9zXWc8Ew4klFA xenolinux-2.4.16-sparse/drivers/block/Config.in
3ddb79bbx682YH6vR2zbVOXwg73ULg xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
3ddb79bcJfHdwrPsjqgI33_OsGdVCg xenolinux-2.4.16-sparse/drivers/block/rd.c
3ddb79bcpVu-IbnqwQqpRqsEbLpsuw xenolinux-2.4.16-sparse/drivers/char/tty_io.c
3e15d5273gfR2fbcYe05kqBSAvCX_w xenolinux-2.4.16-sparse/fs/exec.c
+3e4a8cb7kqfJTMeOpPcYxqxv7N18DA xenolinux-2.4.16-sparse/fs/partitions/check.c
+3e4a8cb7p079Xxly4uNcouacMSjJLw xenolinux-2.4.16-sparse/fs/partitions/msdos.c
3ddb79b8VFtfWSCrXKPN2K21zd_vtw xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h
3ddb79b8Zzi13p3OAPV25QgiC3THAQ xenolinux-2.4.16-sparse/include/asm-xeno/apic.h
3ddb79baZDlsdV_m6C5CXnWMl15p1g xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h
@@ -426,7 +439,10 @@
3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h
3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h
3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
+3e4a8cb7ON8EclY3NN3YPXyMT941hA xenolinux-2.4.16-sparse/include/linux/blk.h
+3e4a8cb7GJrKD0z7EF0VZOhdEa01Mw xenolinux-2.4.16-sparse/include/linux/major.h
3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
+3e4a8cb7j05wwb1uPZgY16s68o7qAw xenolinux-2.4.16-sparse/init/main.c
3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c
3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk
3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c
diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore
index 4fe10ce2d4..e5be6b0ec8 100644
--- a/BitKeeper/etc/ignore
+++ b/BitKeeper/etc/ignore
@@ -3,3 +3,19 @@ PENDING/*
xen-2.4.16/common/kernel.c.old
xen-2.4.16/common/kernel.c.ok-ish
xen-2.4.16/size.image
+xen-2.4.16/drivers/block/ll_rw_blk.c.orig
+xen-2.4.16/drivers/ide/ide-disk.c.orig
+xen-2.4.16/drivers/ide/ide-probe.c.orig
+xen-2.4.16/drivers/ide/ide-taskfile.c.orig
+xen-2.4.16/drivers/ide/ide.c.orig
+xen-2.4.16/drivers/net/e1000/e1000.o
+xen-2.4.16/drivers/net/e1000/e1000_ethtool.o
+xen-2.4.16/drivers/net/e1000/e1000_hw.o
+xen-2.4.16/drivers/net/e1000/e1000_main.o
+xen-2.4.16/drivers/net/e1000/e1000_param.o
+xen-2.4.16/include/hypervisor-ifs/block.h.orig
+xen-2.4.16/include/xeno/blkdev.h.orig
+xen-2.4.16/include/xeno/sched.h.orig
+xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile.orig
+xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c.orig
+xenolinux-2.4.16-sparse/scripts/kconfig.tk
diff --git a/xen-2.4.16/Rules.mk b/xen-2.4.16/Rules.mk
index 8bea789439..33fb3d314b 100644
--- a/xen-2.4.16/Rules.mk
+++ b/xen-2.4.16/Rules.mk
@@ -15,6 +15,7 @@ OBJS += $(patsubst %.c,%.o,$(C_SRCS))
# Note that link order matters!
ALL_OBJS := $(BASEDIR)/common/common.o
ALL_OBJS += $(BASEDIR)/net/network.o
+ALL_OBJS += $(BASEDIR)/drivers/char/driver.o
ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o
ALL_OBJS += $(BASEDIR)/drivers/net/driver.o
ALL_OBJS += $(BASEDIR)/drivers/block/driver.o
diff --git a/xen-2.4.16/arch/i386/entry.S b/xen-2.4.16/arch/i386/entry.S
index 34c8027eb0..928a96ed4e 100644
--- a/xen-2.4.16/arch/i386/entry.S
+++ b/xen-2.4.16/arch/i386/entry.S
@@ -524,6 +524,7 @@ ENTRY(hypervisor_call_table)
.long SYMBOL_NAME(kill_domain)
.long SYMBOL_NAME(do_dom0_op)
.long SYMBOL_NAME(do_network_op)
+ .long SYMBOL_NAME(do_block_io_op)
.long SYMBOL_NAME(do_set_debugreg)
.long SYMBOL_NAME(do_get_debugreg)
.long SYMBOL_NAME(do_update_descriptor)
diff --git a/xen-2.4.16/arch/i386/io_apic.c b/xen-2.4.16/arch/i386/io_apic.c
index c5ae3a3107..8ba37e3903 100644
--- a/xen-2.4.16/arch/i386/io_apic.c
+++ b/xen-2.4.16/arch/i386/io_apic.c
@@ -28,11 +28,23 @@
#include <xeno/config.h>
#include <asm/mc146818rtc.h>
#include <asm/io.h>
-#include <asm/desc.h>
#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/smpboot.h>
+
+
+static unsigned int nmi_watchdog; /* XXXX XEN */
+
+#undef APIC_LOCKUP_DEBUG
+
+#define APIC_LOCKUP_DEBUG
static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
+unsigned char int_delivery_mode = dest_LowestPrio;
+
+
/*
* # of IRQ routing registers
*/
@@ -47,6 +59,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
/*
* This is performance-critical, we want to do it O(1)
+ *
* the indexing order of this array favors 1:1 mappings
* between pins and IRQs.
*/
@@ -60,7 +73,7 @@ static struct irq_pin_list {
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
{
static int first_free_entry = NR_IRQS;
struct irq_pin_list *entry = irq_2_pin + irq;
@@ -78,6 +91,26 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
entry->pin = pin;
}
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (1) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
#define __DO_ACTION(R, ACTION, FINAL) \
\
{ \
@@ -157,6 +190,66 @@ static void clear_IO_APIC (void)
}
/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+
+int skip_ioapic_setup;
+#if 0
+
+static int __init noioapic_setup(char *str)
+{
+ skip_ioapic_setup = 1;
+ return 1;
+}
+
+__setup("noapic", noioapic_setup);
+
+static int __init ioapic_setup(char *str)
+{
+ skip_ioapic_setup = 0;
+ return 1;
+}
+
+__setup("apic", ioapic_setup);
+
+
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+#endif
+
+/*
* Find the IRQ entry number of a certain pin.
*/
static int __init find_irq_entry(int apic, int pin, int type)
@@ -206,7 +299,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
bus, slot, pin);
- if (mp_bus_id_to_pci_bus[bus] == -1) {
+ if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
return -1;
}
@@ -466,6 +559,20 @@ static int pin_2_irq(int idx, int apic, int pin)
}
}
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
return irq;
}
@@ -495,11 +602,17 @@ static int __init assign_irq_vector(int irq)
return IO_APIC_VECTOR(irq);
next:
current_vector += 8;
+
/* XXX Skip the guestOS -> Xen syscall vector! XXX */
if (current_vector == HYPERVISOR_CALL_VECTOR) goto next;
/* XXX Skip the Linux/BSD fast-trap vector! XXX */
if (current_vector == 0x80) goto next;
+#if 0
+ if (current_vector == SYSCALL_VECTOR)
+ goto next;
+#endif
+
if (current_vector > FIRST_SYSTEM_VECTOR) {
offset++;
current_vector = FIRST_DEVICE_VECTOR + offset;
@@ -532,10 +645,10 @@ void __init setup_IO_APIC_irqs(void)
*/
memset(&entry,0,sizeof(entry));
- entry.delivery_mode = dest_LowestPrio;
- entry.dest_mode = INT_DELIVERY_MODE;
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest = TARGET_CPUS;
+ entry.dest.logical.logical_dest = target_cpus();
idx = find_irq_entry(apic,pin,mp_INT);
if (idx == -1) {
@@ -553,11 +666,18 @@ void __init setup_IO_APIC_irqs(void)
if (irq_trigger(idx)) {
entry.trigger = 1;
entry.mask = 1;
- entry.dest.logical.logical_dest = TARGET_CPUS;
}
irq = pin_2_irq(idx, apic, pin);
- add_pin_to_irq(irq, apic, pin);
+ /*
+ * skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum
+ */
+ if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ && (apic != 0) && (irq == 0))
+ continue;
+ else
+ add_pin_to_irq(irq, apic, pin);
if (!apic && !IO_APIC_IRQ(irq))
continue;
@@ -607,16 +727,16 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
* We use logical delivery to get the timer IRQ
* to the first CPU.
*/
- entry.dest_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
entry.mask = 0; /* unmask IRQ now */
- entry.dest.logical.logical_dest = TARGET_CPUS;
- entry.delivery_mode = dest_LowestPrio;
+ entry.dest.logical.logical_dest = target_cpus();
+ entry.delivery_mode = INT_DELIVERY_MODE;
entry.polarity = 0;
entry.trigger = 0;
entry.vector = vector;
/*
- * The timer IRQ doesnt have to know that behind the
+ * The timer IRQ doesn't have to know that behind the
* scene we have a 8259A-master in AEOI mode ...
*/
irq_desc[0].handler = &ioapic_edge_irq_type;
@@ -634,8 +754,9 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
void __init UNEXPECTED_IO_APIC(void)
{
- printk(KERN_WARNING " WARNING: unexpected IO-APIC, please mail\n");
- printk(KERN_WARNING " to linux-smp@vger.kernel.org\n");
+ printk(KERN_WARNING
+ "An unexpected IO-APIC was found. If this kernel release is less than\n"
+ "three months old please report this to linux-smp@vger.kernel.org\n");
}
void __init print_IO_APIC(void)
@@ -667,7 +788,7 @@ void __init print_IO_APIC(void)
spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG "IO APIC #%d..XXXX....\n", mp_ioapics[apic].mpc_apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID);
if (reg_00.__reserved_1 || reg_00.__reserved_2)
@@ -688,6 +809,7 @@ void __init print_IO_APIC(void)
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ);
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version);
if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.version != 0x02) && /* VIA */
(reg_01.version != 0x10) && /* oldest IO-APICs */
(reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
(reg_01.version != 0x13) && /* Xeon IO-APICs */
@@ -898,6 +1020,9 @@ static void __init enable_IO_APIC(void)
irq_2_pin[i].pin = -1;
irq_2_pin[i].next = 0;
}
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
/*
* The number of IO-APIC IRQ registers (== #pins):
@@ -944,6 +1069,9 @@ static void __init setup_ioapic_ids_from_mpc (void)
unsigned char old_id;
unsigned long flags;
+ if (clustered_apic_mode)
+ /* We don't have a good way to do this yet - hack */
+ phys_id_present_map = (u_long) 0xf;
/*
* Set the IOAPIC ID to the value stored in the MPC table.
*/
@@ -956,7 +1084,7 @@ static void __init setup_ioapic_ids_from_mpc (void)
old_id = mp_ioapics[apic].mpc_apicid;
- if (mp_ioapics[apic].mpc_apicid >= 0xf) {
+ if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
apic, mp_ioapics[apic].mpc_apicid);
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
@@ -968,14 +1096,16 @@ static void __init setup_ioapic_ids_from_mpc (void)
* Sanity check, is the ID really free? Every APIC in a
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
+ * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
*/
- if (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid)) {
+ if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
+ (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
apic, mp_ioapics[apic].mpc_apicid);
for (i = 0; i < 0xf; i++)
if (!(phys_id_present_map & (1 << i)))
break;
- if (i >= 0xf)
+ if (i >= apic_broadcast_id)
panic("Max APIC ID exceeded!\n");
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
i);
@@ -1170,6 +1300,10 @@ static void end_level_ioapic_irq (unsigned int irq)
#ifdef APIC_LOCKUP_DEBUG
struct irq_pin_list *entry;
#endif
+
+#ifdef APIC_MISMATCH_DEBUG
+ atomic_inc(&irq_mis_count);
+#endif
spin_lock(&ioapic_lock);
__mask_and_edge_IO_APIC_irq(irq);
#ifdef APIC_LOCKUP_DEBUG
@@ -1302,6 +1436,36 @@ static struct hw_interrupt_type lapic_irq_type = {
end_lapic_irq
};
+static void enable_NMI_through_LVT0 (void * dummy)
+{
+ unsigned int v, ver;
+
+ ver = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(ver);
+ v = APIC_DM_NMI; /* unmask and set to NMI */
+ if (!APIC_INTEGRATED(ver)) /* 82489DX */
+ v |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write_around(APIC_LVT0, v);
+}
+
+static void setup_nmi (void)
+{
+ /*
+ * Dirty trick to enable the NMI watchdog ...
+ * We put the 8259A master into AEOI mode and
+ * unmask on all local APICs LVT0 as NMI.
+ *
+ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+ * is from Maciej W. Rozycki - so we do not have to EOI from
+ * the NMI handler or the timer interrupt.
+ */
+ printk(KERN_INFO "activating NMI Watchdog ...");
+
+ smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1);
+ enable_NMI_through_LVT0(NULL);
+
+ printk(" done.\n");
+}
/*
* This looks a bit hackish but it's about the only one way of sending
@@ -1407,6 +1571,12 @@ static inline void check_timer(void)
*/
unmask_IO_APIC_irq(0);
if (timer_irq_works()) {
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ // XXX Xen check_nmi_watchdog();
+ }
return;
}
clear_IO_APIC_pin(0, pin1);
@@ -1422,6 +1592,14 @@ static inline void check_timer(void)
setup_ExtINT_IRQ0_pin(pin2, vector);
if (timer_irq_works()) {
printk("works.\n");
+ if (pin1 != -1)
+ replace_pin_at_irq(0, 0, pin1, 0, pin2);
+ else
+ add_pin_to_irq(0, 0, pin2);
+ if (nmi_watchdog == NMI_IO_APIC) {
+ setup_nmi();
+ // XXX Xen check_nmi_watchdog();
+ }
return;
}
/*
@@ -1431,6 +1609,11 @@ static inline void check_timer(void)
}
printk(" failed.\n");
+ if (nmi_watchdog) {
+ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ nmi_watchdog = 0;
+ }
+
printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
disable_8259A_irq(0);
@@ -1462,10 +1645,19 @@ static inline void check_timer(void)
}
/*
+ *
* IRQ's that are handled by the old PIC in all cases:
* - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
* Linux doesn't really care, as it's not actually used
* for any interrupt handling anyway.
+ * - There used to be IRQ13 here as well, but all
+ * MPS-compliant must not use it for FPU coupling and we
+ * want to use exception 16 anyway. And there are
+ * systems who connect it to an I/O APIC for other uses.
+ * Thus we don't mark it special any longer.
+ *
+ * Additionally, something is definitely wrong with irq9
+ * on PIIX4 boards.
*/
#define PIC_IRQS (1<<2)
diff --git a/xen-2.4.16/arch/i386/mpparse.c b/xen-2.4.16/arch/i386/mpparse.c
index c5cf58a312..4f0edeea0e 100644
--- a/xen-2.4.16/arch/i386/mpparse.c
+++ b/xen-2.4.16/arch/i386/mpparse.c
@@ -20,6 +20,10 @@
#include <xeno/smp.h>
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
+#include <xeno/kernel.h>
+
+int numnodes = 1; /* XXX Xen */
/* Have we found an MP table */
int smp_found_config;
@@ -29,16 +33,20 @@ int smp_found_config;
* MP-table.
*/
int apic_version [MAX_APICS];
-int mp_bus_id_to_type [MAX_MP_BUSSES];
-int mp_bus_id_to_node [MAX_MP_BUSSES];
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
int mp_current_pci_id;
+int *mp_bus_id_to_type;
+int *mp_bus_id_to_node;
+int *mp_bus_id_to_local;
+int *mp_bus_id_to_pci_bus;
+int max_mp_busses;
+int max_irq_sources;
/* I/O APIC entries */
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mpc_config_intsrc *mp_irqs;
/* MP IRQ source entries */
int mp_irq_entries;
@@ -56,23 +64,32 @@ static unsigned int num_processors;
/* Bitmask of physically existing CPUs */
unsigned long phys_cpu_present_map;
+unsigned long logical_cpu_present_map;
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+unsigned char esr_disable = 0;
+unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
+unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+#endif
+unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
/*
* Intel MP BIOS table parsing routines:
*/
+#ifndef CONFIG_X86_VISWS_APIC
/*
* Checksum an MP configuration block.
*/
static int __init mpf_checksum(unsigned char *mp, int len)
{
- int sum = 0;
+ int sum = 0;
- while (len--)
- sum += *mp++;
+ while (len--)
+ sum += *mp++;
- return sum & 0xFF;
+ return sum & 0xFF;
}
/*
@@ -81,38 +98,47 @@ static int __init mpf_checksum(unsigned char *mp, int len)
static char __init *mpc_family(int family,int model)
{
- static char n[32];
- static char *model_defs[]=
- {
- "80486DX","80486DX",
- "80486SX","80486DX/2 or 80487",
- "80486SL","80486SX/2",
- "Unknown","80486DX/2-WB",
- "80486DX/4","80486DX/4-WB"
- };
-
- switch (family) {
- case 0x04:
- if (model < 10)
- return model_defs[model];
- break;
-
- case 0x05:
- return("Pentium(tm)");
-
- case 0x06:
- return("Pentium(tm) Pro");
-
- case 0x0F:
- if (model == 0x00)
- return("Pentium 4(tm)");
- if (model == 0x0F)
- return("Special controller");
- }
- sprintf(n,"Unknown CPU [%d:%d]",family, model);
- return n;
+ static char n[32];
+ static char *model_defs[]=
+ {
+ "80486DX","80486DX",
+ "80486SX","80486DX/2 or 80487",
+ "80486SL","80486SX/2",
+ "Unknown","80486DX/2-WB",
+ "80486DX/4","80486DX/4-WB"
+ };
+
+ switch (family) {
+ case 0x04:
+ if (model < 10)
+ return model_defs[model];
+ break;
+
+ case 0x05:
+ return("Pentium(tm)");
+
+ case 0x06:
+ return("Pentium(tm) Pro");
+
+ case 0x0F:
+ if (model == 0x00)
+ return("Pentium 4(tm)");
+ if (model == 0x02)
+ return("Pentium 4(tm) XEON(tm)");
+ if (model == 0x0F)
+ return("Special controller");
+ }
+ sprintf(n,"Unknown CPU [%d:%d]",family, model);
+ return n;
}
+#ifdef CONFIG_X86_IO_APIC
+// XXX Xen extern int have_acpi_tables; /* set by acpitable.c */
+#define have_acpi_tables (0)
+#else
+#define have_acpi_tables (0)
+#endif
+
/*
* Have to match translation table entries to main table entries by counter
* hence the mpc_record variable .... can't see a less disgusting way of
@@ -120,127 +146,256 @@ static char __init *mpc_family(int family,int model)
*/
static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver, logical_apicid;
+ int ver, quad, logical_apicid;
- if (!(m->mpc_cpuflag & CPU_ENABLED))
- return;
-
- logical_apicid = m->mpc_apicid;
- printk("Processor #%d %s APIC version %d\n",
- m->mpc_apicid,
- mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
- m->mpc_apicver);
-
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- Dprintk(" Bootup CPU\n");
- boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = logical_apicid;
- }
-
- num_processors++;
-
- if (m->mpc_apicid > MAX_APICS) {
- printk("Processor #%d INVALID. (Max ID: %d).\n",
- m->mpc_apicid, MAX_APICS);
- return;
- }
- ver = m->mpc_apicver;
-
- phys_cpu_present_map |= 1 << m->mpc_apicid;
-
- /*
- * Validate version
- */
- if (ver == 0x0) {
- printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
- ver = 0x10;
- }
- apic_version[m->mpc_apicid] = ver;
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ logical_apicid = m->mpc_apicid;
+ if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ quad = translation_table[mpc_record]->trans_quad;
+ logical_apicid = (quad << 4) +
+ (m->mpc_apicid ? m->mpc_apicid << 1 : 1);
+ printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
+ m->mpc_apicid,
+ mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+ m->mpc_apicver, quad, logical_apicid);
+ } else {
+ printk("Processor #%d %s APIC version %d\n",
+ m->mpc_apicid,
+ mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+ m->mpc_apicver);
+ }
+
+ if (m->mpc_featureflag&(1<<0))
+ Dprintk(" Floating point unit present.\n");
+ if (m->mpc_featureflag&(1<<7))
+ Dprintk(" Machine Exception supported.\n");
+ if (m->mpc_featureflag&(1<<8))
+ Dprintk(" 64 bit compare & exchange supported.\n");
+ if (m->mpc_featureflag&(1<<9))
+ Dprintk(" Internal APIC present.\n");
+ if (m->mpc_featureflag&(1<<11))
+ Dprintk(" SEP present.\n");
+ if (m->mpc_featureflag&(1<<12))
+ Dprintk(" MTRR present.\n");
+ if (m->mpc_featureflag&(1<<13))
+ Dprintk(" PGE present.\n");
+ if (m->mpc_featureflag&(1<<14))
+ Dprintk(" MCA present.\n");
+ if (m->mpc_featureflag&(1<<15))
+ Dprintk(" CMOV present.\n");
+ if (m->mpc_featureflag&(1<<16))
+ Dprintk(" PAT present.\n");
+ if (m->mpc_featureflag&(1<<17))
+ Dprintk(" PSE present.\n");
+ if (m->mpc_featureflag&(1<<18))
+ Dprintk(" PSN present.\n");
+ if (m->mpc_featureflag&(1<<19))
+ Dprintk(" Cache Line Flush Instruction present.\n");
+ /* 20 Reserved */
+ if (m->mpc_featureflag&(1<<21))
+ Dprintk(" Debug Trace and EMON Store present.\n");
+ if (m->mpc_featureflag&(1<<22))
+ Dprintk(" ACPI Thermal Throttle Registers present.\n");
+ if (m->mpc_featureflag&(1<<23))
+ Dprintk(" MMX present.\n");
+ if (m->mpc_featureflag&(1<<24))
+ Dprintk(" FXSR present.\n");
+ if (m->mpc_featureflag&(1<<25))
+ Dprintk(" XMM present.\n");
+ if (m->mpc_featureflag&(1<<26))
+ Dprintk(" Willamette New Instructions present.\n");
+ if (m->mpc_featureflag&(1<<27))
+ Dprintk(" Self Snoop present.\n");
+ if (m->mpc_featureflag&(1<<28))
+ Dprintk(" HT present.\n");
+ if (m->mpc_featureflag&(1<<29))
+ Dprintk(" Thermal Monitor present.\n");
+ /* 30, 31 Reserved */
+
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ Dprintk(" Bootup CPU\n");
+ boot_cpu_physical_apicid = m->mpc_apicid;
+ boot_cpu_logical_apicid = logical_apicid;
+ }
+
+ num_processors++;
+
+ if (m->mpc_apicid > MAX_APICS) {
+ printk("Processor #%d INVALID. (Max ID: %d).\n",
+ m->mpc_apicid, MAX_APICS);
+ --num_processors;
+ return;
+ }
+ ver = m->mpc_apicver;
+
+ logical_cpu_present_map |= 1 << (num_processors-1);
+ phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
+
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+ raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
}
static void __init MP_bus_info (struct mpc_config_bus *m)
{
- char str[7];
+ char str[7];
+ int quad;
- memcpy(str, m->mpc_bustype, 6);
- str[6] = 0;
+ memcpy(str, m->mpc_bustype, 6);
+ str[6] = 0;
- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
-
- if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
- } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
- } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
- mp_current_pci_id++;
- } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
- } else {
- printk("Unknown bustype %s - ignoring\n", str);
- }
+ if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ quad = translation_table[mpc_record]->trans_quad;
+ mp_bus_id_to_node[m->mpc_busid] = quad;
+ mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
+ quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
+ printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
+ } else {
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+ }
+
+ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else {
+ printk("Unknown bustype %s - ignoring\n", str);
+ }
}
static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
{
- if (!(m->mpc_flags & MPC_APIC_USABLE))
- return;
-
- printk("I/O APIC #%d Version %d at 0x%lX.\n",
- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
- if (nr_ioapics >= MAX_IO_APICS) {
- printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
- MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
- }
- if (!m->mpc_apicaddr) {
- printk("WARNING: bogus zero I/O APIC address"
- " found in MP table, skipping!\n");
- return;
- }
- mp_ioapics[nr_ioapics] = *m;
- nr_ioapics++;
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+ printk("I/O APIC #%d Version %d at 0x%lX.\n",
+ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+ MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+ }
+ if (!m->mpc_apicaddr) {
+ printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+ " found in MP table, skipping!\n");
+ return;
+ }
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
}
static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
{
- mp_irqs [mp_irq_entries] = *m;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
+ mp_irqs [mp_irq_entries] = *m;
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+ if (++mp_irq_entries == max_irq_sources)
+ panic("Max # of irq sources exceeded!!\n");
}
static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
{
- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
- /*
- * Well it seems all SMP boards in existence
- * use ExtINT/LVT1 == LINT0 and
- * NMI/LVT2 == LINT1 - the following check
- * will show us if this assumptions is false.
- * Until then we do not have to add baggage.
- */
- if ((m->mpc_irqtype == mp_ExtINT) &&
- (m->mpc_destapiclint != 0))
- BUG();
- if ((m->mpc_irqtype == mp_NMI) &&
- (m->mpc_destapiclint != 1))
- BUG();
+ Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+ /*
+ * Well it seems all SMP boards in existence
+ * use ExtINT/LVT1 == LINT0 and
+ * NMI/LVT2 == LINT1 - the following check
+ * will show us if this assumptions is false.
+ * Until then we do not have to add baggage.
+ */
+ if ((m->mpc_irqtype == mp_ExtINT) &&
+ (m->mpc_destapiclint != 0))
+ BUG();
+ if ((m->mpc_irqtype == mp_NMI) &&
+ (m->mpc_destapiclint != 1))
+ BUG();
}
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+ printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk("MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+ if (m->trans_quad+1 > numnodes)
+ numnodes = m->trans_quad+1;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+ unsigned short oemsize)
+{
+ int count = sizeof (*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+
+ printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+ {
+ printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->oem_signature[0],
+ oemtable->oem_signature[1],
+ oemtable->oem_signature[2],
+ oemtable->oem_signature[3]);
+ return;
+ }
+ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+ {
+ printk("SMP oem mptable: checksum error!\n");
+ return;
+ }
+ while (count < oemtable->oem_length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_config_translation *m=
+ (struct mpc_config_translation *)oemptr;
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ {
+ printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+ return;
+ }
+ }
+ }
+}
/*
* Read/parse the MPC
@@ -248,383 +403,542 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
static int __init smp_read_mpc(struct mp_config_table *mpc)
{
- char str[16];
- int count=sizeof(*mpc);
- unsigned char *mpt=((unsigned char *)mpc)+count;
-
- if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
- panic("SMP mptable: bad signature [%c%c%c%c]!\n",
- mpc->mpc_signature[0],
- mpc->mpc_signature[1],
- mpc->mpc_signature[2],
- mpc->mpc_signature[3]);
- return 0;
- }
- if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
- panic("SMP mptable: checksum error!\n");
- return 0;
- }
- if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
- printk("SMP mptable: bad table version (%d)!!\n",
- mpc->mpc_spec);
- return 0;
- }
- if (!mpc->mpc_lapic) {
- printk("SMP mptable: null local APIC address!\n");
- return 0;
- }
- memcpy(str,mpc->mpc_oem,8);
- str[8]=0;
- printk("OEM ID: %s ",str);
-
- memcpy(str,mpc->mpc_productid,12);
- str[12]=0;
- printk("Product ID: %s ",str);
-
- printk("APIC at: 0x%lX\n", mpc->mpc_lapic);
-
- /* save the local APIC address, it might be non-default. */
- mp_lapic_addr = mpc->mpc_lapic;
-
- /*
- * Now process the configuration blocks.
- */
- while (count < mpc->mpc_length) {
- switch(*mpt) {
- case MP_PROCESSOR:
- {
- struct mpc_config_processor *m=
- (struct mpc_config_processor *)mpt;
-
- MP_processor_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_BUS:
- {
- struct mpc_config_bus *m=
- (struct mpc_config_bus *)mpt;
- MP_bus_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_IOAPIC:
- {
- struct mpc_config_ioapic *m=
- (struct mpc_config_ioapic *)mpt;
- MP_ioapic_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_INTSRC:
- {
- struct mpc_config_intsrc *m=
- (struct mpc_config_intsrc *)mpt;
-
- MP_intsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_LINTSRC:
- {
- struct mpc_config_lintsrc *m=
- (struct mpc_config_lintsrc *)mpt;
- MP_lintsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- default:
- {
- count = mpc->mpc_length;
- break;
- }
- }
- ++mpc_record;
- }
-
- if (!num_processors)
- printk("SMP mptable: no processors registered!\n");
- return num_processors;
+ char oem[16], prod[14];
+ int count=sizeof(*mpc);
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+ int num_bus = 0;
+ int num_irq = 0;
+ unsigned char *bus_data;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+ panic("SMP mptable: bad signature [%c%c%c%c]!\n",
+ mpc->mpc_signature[0],
+ mpc->mpc_signature[1],
+ mpc->mpc_signature[2],
+ mpc->mpc_signature[3]);
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+ panic("SMP mptable: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+ printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(oem,mpc->mpc_oem,8);
+ oem[8]=0;
+ printk("OEM ID: %s ",oem);
+
+ memcpy(prod,mpc->mpc_productid,12);
+ prod[12]=0;
+ printk("Product ID: %s ",prod);
+
+ detect_clustered_apic(oem, prod);
+
+ printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+ /* save the local APIC address, it might be non-default,
+ * but only if we're not using the ACPI tables
+ */
+ if (!have_acpi_tables)
+ mp_lapic_addr = mpc->mpc_lapic;
+
+ if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
+ /* We need to process the oem mpc tables to tell us which quad things are in ... */
+ mpc_record = 0;
+ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize);
+ mpc_record = 0;
+ }
+
+ /* Pre-scan to determine the number of bus and
+ * interrupts records we have
+ */
+ while (count < mpc->mpc_length) {
+ switch (*mpt) {
+ case MP_PROCESSOR:
+ mpt += sizeof(struct mpc_config_processor);
+ count += sizeof(struct mpc_config_processor);
+ break;
+ case MP_BUS:
+ ++num_bus;
+ mpt += sizeof(struct mpc_config_bus);
+ count += sizeof(struct mpc_config_bus);
+ break;
+ case MP_INTSRC:
+ ++num_irq;
+ mpt += sizeof(struct mpc_config_intsrc);
+ count += sizeof(struct mpc_config_intsrc);
+ break;
+ case MP_IOAPIC:
+ mpt += sizeof(struct mpc_config_ioapic);
+ count += sizeof(struct mpc_config_ioapic);
+ break;
+ case MP_LINTSRC:
+ mpt += sizeof(struct mpc_config_lintsrc);
+ count += sizeof(struct mpc_config_lintsrc);
+ break;
+ default:
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ /*
+ * Paranoia: Allocate one extra of both the number of busses and number
+ * of irqs, and make sure that we have at least 4 interrupts per PCI
+ * slot. But some machines do not report very many busses, so we need
+ * to fall back on the older defaults.
+ */
+ ++num_bus;
+ max_mp_busses = max(num_bus, MAX_MP_BUSSES);
+ if (num_irq < (4 * max_mp_busses))
+ num_irq = 4 * num_bus; /* 4 intr/PCI slot */
+ ++num_irq;
+ max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
+
+ count = (max_mp_busses * sizeof(int)) * 4;
+ count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
+
+ {
+ //bus_data = alloc_bootmem(count); XXX Xen
+ static char arr[4096];
+ if(count > 4096) BUG();
+ bus_data = (void*)arr;
+
+ }
+ if (!bus_data) {
+ printk(KERN_ERR "SMP mptable: out of memory!\n");
+ return 0;
+ }
+ mp_bus_id_to_type = (int *)&bus_data[0];
+ mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
+ mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
+ mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
+ mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
+ memset(mp_bus_id_to_pci_bus, -1, max_mp_busses);
+
+ /*
+ * Now process the configuration blocks.
+ */
+ count = sizeof(*mpc);
+ mpt = ((unsigned char *)mpc)+count;
+ while (count < mpc->mpc_length) {
+ switch(*mpt) {
+ case MP_PROCESSOR:
+ {
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+
+ /* ACPI may already have provided this one for us */
+ if (!have_acpi_tables)
+ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_BUS:
+ {
+ struct mpc_config_bus *m=
+ (struct mpc_config_bus *)mpt;
+ MP_bus_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_IOAPIC:
+ {
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+ {
+ struct mpc_config_intsrc *m=
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+ {
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ default:
+ {
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ ++mpc_record;
+ }
+
+ if (clustered_apic_mode){
+ phys_cpu_present_map = logical_cpu_present_map;
+ }
+
+
+ printk("Enabling APIC mode: ");
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ printk("Clustered Logical. ");
+ else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+ printk("Physical. ");
+ else
+ printk("Flat. ");
+ printk("Using %d I/O APICs\n",nr_ioapics);
+
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
}
static int __init ELCR_trigger(unsigned int irq)
{
- unsigned int port;
+ unsigned int port;
- port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
+ port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
}
static void __init construct_default_ioirq_mptable(int mpc_default_type)
{
- struct mpc_config_intsrc intsrc;
- int i;
- int ELCR_fallback = 0;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* conforming */
- intsrc.mpc_srcbus = 0;
- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
- intsrc.mpc_irqtype = mp_INT;
-
- /*
- * If true, we have an ISA/PCI system with no IRQ entries
- * in the MP table. To prevent the PCI interrupts from being set up
- * incorrectly, we try to use the ELCR. The sanity check to see if
- * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
- * never be level sensitive, so we simply see if the ELCR agrees.
- * If it does, we assume it's valid.
- */
- if (mpc_default_type == 5) {
- printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
- if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
- printk("ELCR contains invalid data... not using ELCR\n");
- else {
- printk("Using ELCR to identify PCI interrupts\n");
- ELCR_fallback = 1;
- }
- }
-
- for (i = 0; i < 16; i++) {
- switch (mpc_default_type) {
- case 2:
- if (i == 0 || i == 13)
- continue; /* IRQ0 & IRQ13 not connected */
- /* fall through */
- default:
- if (i == 2)
- continue; /* IRQ2 is never connected */
- }
-
- if (ELCR_fallback) {
- /*
- * If the ELCR indicates a level-sensitive interrupt, we
- * copy that information over to the MP table in the
- * irqflag field (level sensitive, active high polarity).
- */
- if (ELCR_trigger(i))
- intsrc.mpc_irqflag = 13;
- else
- intsrc.mpc_irqflag = 0;
- }
-
- intsrc.mpc_srcbusirq = i;
- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
- }
-
- intsrc.mpc_irqtype = mp_ExtINT;
- intsrc.mpc_srcbusirq = 0;
- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
+ struct mpc_config_intsrc intsrc;
+ int i;
+ int ELCR_fallback = 0;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* conforming */
+ intsrc.mpc_srcbus = 0;
+ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+ intsrc.mpc_irqtype = mp_INT;
+
+ /*
+ * If true, we have an ISA/PCI system with no IRQ entries
+ * in the MP table. To prevent the PCI interrupts from being set up
+ * incorrectly, we try to use the ELCR. The sanity check to see if
+ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+ * never be level sensitive, so we simply see if the ELCR agrees.
+ * If it does, we assume it's valid.
+ */
+ if (mpc_default_type == 5) {
+ printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+ printk("ELCR contains invalid data... not using ELCR\n");
+ else {
+ printk("Using ELCR to identify PCI interrupts\n");
+ ELCR_fallback = 1;
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ switch (mpc_default_type) {
+ case 2:
+ if (i == 0 || i == 13)
+ continue; /* IRQ0 & IRQ13 not connected */
+ /* fall through */
+ default:
+ if (i == 2)
+ continue; /* IRQ2 is never connected */
+ }
+
+ if (ELCR_fallback) {
+ /*
+ * If the ELCR indicates a level-sensitive interrupt, we
+ * copy that information over to the MP table in the
+ * irqflag field (level sensitive, active high polarity).
+ */
+ if (ELCR_trigger(i))
+ intsrc.mpc_irqflag = 13;
+ else
+ intsrc.mpc_irqflag = 0;
+ }
+
+ intsrc.mpc_srcbusirq = i;
+ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
+ MP_intsrc_info(&intsrc);
+ }
+
+ intsrc.mpc_irqtype = mp_ExtINT;
+ intsrc.mpc_srcbusirq = 0;
+ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
+ MP_intsrc_info(&intsrc);
}
static inline void __init construct_default_ISA_mptable(int mpc_default_type)
{
- struct mpc_config_processor processor;
- struct mpc_config_bus bus;
- struct mpc_config_ioapic ioapic;
- struct mpc_config_lintsrc lintsrc;
- int linttypes[2] = { mp_ExtINT, mp_NMI };
- int i;
-
- /*
- * local APIC has default address
- */
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /*
- * 2 CPUs, numbered 0 & 1.
- */
- processor.mpc_type = MP_PROCESSOR;
- /* Either an integrated APIC or a discrete 82489DX. */
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- processor.mpc_cpuflag = CPU_ENABLED;
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) |
- boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
- for (i = 0; i < 2; i++) {
- processor.mpc_apicid = i;
- MP_processor_info(&processor);
- }
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- switch (mpc_default_type) {
- default:
- printk("???\nUnknown standard configuration %d\n",
- mpc_default_type);
- /* fall through */
- case 1:
- case 5:
- memcpy(bus.mpc_bustype, "ISA ", 6);
- break;
- case 2:
- case 6:
- case 3:
- memcpy(bus.mpc_bustype, "EISA ", 6);
- break;
- case 4:
- case 7:
- memcpy(bus.mpc_bustype, "MCA ", 6);
- }
- MP_bus_info(&bus);
- if (mpc_default_type > 4) {
- bus.mpc_busid = 1;
- memcpy(bus.mpc_bustype, "PCI ", 6);
- MP_bus_info(&bus);
- }
-
- ioapic.mpc_type = MP_IOAPIC;
- ioapic.mpc_apicid = 2;
- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- ioapic.mpc_flags = MPC_APIC_USABLE;
- ioapic.mpc_apicaddr = 0xFEC00000;
- MP_ioapic_info(&ioapic);
-
- /*
- * We set up most of the low 16 IO-APIC pins according to MPS rules.
- */
- construct_default_ioirq_mptable(mpc_default_type);
-
- lintsrc.mpc_type = MP_LINTSRC;
- lintsrc.mpc_irqflag = 0; /* conforming */
- lintsrc.mpc_srcbusid = 0;
- lintsrc.mpc_srcbusirq = 0;
- lintsrc.mpc_destapic = MP_APIC_ALL;
- for (i = 0; i < 2; i++) {
- lintsrc.mpc_irqtype = linttypes[i];
- lintsrc.mpc_destapiclint = i;
- MP_lintsrc_info(&lintsrc);
- }
+ struct mpc_config_processor processor;
+ struct mpc_config_bus bus;
+ struct mpc_config_ioapic ioapic;
+ struct mpc_config_lintsrc lintsrc;
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
+ int i;
+
+ /*
+ * local APIC has default address
+ */
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /*
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+ /* Either an integrated APIC or a discrete 82489DX. */
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ processor.mpc_cpuflag = CPU_ENABLED;
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+ processor.mpc_apicid = i;
+ MP_processor_info(&processor);
+ }
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ switch (mpc_default_type) {
+ default:
+ printk("???\nUnknown standard configuration %d\n",
+ mpc_default_type);
+ /* fall through */
+ case 1:
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+ case 2:
+ case 6:
+ case 3:
+ memcpy(bus.mpc_bustype, "EISA ", 6);
+ break;
+ case 4:
+ case 7:
+ memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+ bus.mpc_busid = 1;
+ memcpy(bus.mpc_bustype, "PCI ", 6);
+ MP_bus_info(&bus);
+ }
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+
+ /*
+ * We set up most of the low 16 IO-APIC pins according to MPS rules.
+ */
+ construct_default_ioirq_mptable(mpc_default_type);
+
+ lintsrc.mpc_type = MP_LINTSRC;
+ lintsrc.mpc_irqflag = 0; /* conforming */
+ lintsrc.mpc_srcbusid = 0;
+ lintsrc.mpc_srcbusirq = 0;
+ lintsrc.mpc_destapic = MP_APIC_ALL;
+ for (i = 0; i < 2; i++) {
+ lintsrc.mpc_irqtype = linttypes[i];
+ lintsrc.mpc_destapiclint = i;
+ MP_lintsrc_info(&lintsrc);
+ }
}
static struct intel_mp_floating *mpf_found;
+extern void config_acpi_tables(void);
/*
* Scan the memory blocks for an SMP configuration block.
*/
void __init get_smp_config (void)
{
- struct intel_mp_floating *mpf = mpf_found;
+ struct intel_mp_floating *mpf = mpf_found;
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Check if the ACPI tables are provided. Use them only to get
+ * the processor information, mainly because it provides
+ * the info on the logical processor(s), rather than the physical
+ * processor(s) that are provided by the MPS. We attempt to
+ * check only if the user provided a commandline override
+ */
+ //XXX Xen config_acpi_tables();
+#endif
- printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
- if (mpf->mpf_feature2 & (1<<7)) {
- printk(" IMCR and PIC compatibility mode.\n");
- pic_mode = 1;
- } else {
- printk(" Virtual Wire compatibility mode.\n");
- pic_mode = 0;
- }
-
- /*
- * Now see if we need to read further.
- */
- if (mpf->mpf_feature1 != 0) {
-
- printk("Default MP configuration #%d\n", mpf->mpf_feature1);
- construct_default_ISA_mptable(mpf->mpf_feature1);
-
- } else if (mpf->mpf_physptr) {
-
- /*
- * Read the physical hardware table. Anything here will
- * override the defaults.
- */
- if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
- smp_found_config = 0;
- printk("BIOS bug, MP table errors detected!...\n");
- printk("... disabling SMP support. (tell your hw vendor)\n");
- return;
- }
- /*
- * If there are no explicit MP IRQ entries, then we are
- * broken. We set up most of the low 16 IO-APIC pins to
- * ISA defaults and hope it will work.
- */
- if (!mp_irq_entries) {
- struct mpc_config_bus bus;
-
- printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- memcpy(bus.mpc_bustype, "ISA ", 6);
- MP_bus_info(&bus);
-
- construct_default_ioirq_mptable(0);
- }
-
- } else
- BUG();
-
- printk("Processors: %d\n", num_processors);
- /*
- * Only use the first configuration found.
- */
+ printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+ if (mpf->mpf_feature2 & (1<<7)) {
+ printk(" IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ } else {
+ printk(" Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ }
+
+ /*
+ * Now see if we need to read further.
+ */
+ if (mpf->mpf_feature1 != 0) {
+
+ printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+ construct_default_ISA_mptable(mpf->mpf_feature1);
+
+ } else if (mpf->mpf_physptr) {
+
+ /*
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+ if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+ return;
+ }
+ /*
+ * If there are no explicit MP IRQ entries, then we are
+ * broken. We set up most of the low 16 IO-APIC pins to
+ * ISA defaults and hope it will work.
+ */
+ if (!mp_irq_entries) {
+ struct mpc_config_bus bus;
+
+ printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ MP_bus_info(&bus);
+
+ construct_default_ioirq_mptable(0);
+ }
+
+ } else
+ BUG();
+
+ printk("Processors: %d\n", num_processors);
+ /*
+ * Only use the first configuration found.
+ */
}
static int __init smp_scan_config (unsigned long base, unsigned long length)
{
- unsigned long *bp = phys_to_virt(base);
- struct intel_mp_floating *mpf;
-
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
- if (sizeof(*mpf) != 16)
- printk("Error: MPF size\n");
-
- while (length > 0) {
- mpf = (struct intel_mp_floating *)bp;
- if ((*bp == SMP_MAGIC_IDENT) &&
- (mpf->mpf_length == 1) &&
- !mpf_checksum((unsigned char *)bp, 16) &&
- ((mpf->mpf_specification == 1)
- || (mpf->mpf_specification == 4)) ) {
-
- smp_found_config = 1;
- printk("found SMP MP-table at %08lx\n",
- virt_to_phys(mpf));
- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
- if (mpf->mpf_physptr)
- reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
- mpf_found = mpf;
- return 1;
- }
- bp += 4;
- length -= 16;
- }
- return 0;
+ unsigned long *bp = phys_to_virt(base);
+ struct intel_mp_floating *mpf;
+
+ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ if (sizeof(*mpf) != 16)
+ printk("Error: MPF size\n");
+
+ while (length > 0) {
+ mpf = (struct intel_mp_floating *)bp;
+ if ((*bp == SMP_MAGIC_IDENT) &&
+ (mpf->mpf_length == 1) &&
+ !mpf_checksum((unsigned char *)bp, 16) &&
+ ((mpf->mpf_specification == 1)
+ || (mpf->mpf_specification == 4)) ) {
+
+ smp_found_config = 1;
+ printk("found SMP MP-table at %08lx\n",
+ virt_to_phys(mpf));
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ if (mpf->mpf_physptr)
+ reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
+ mpf_found = mpf;
+ return 1;
+ }
+ bp += 4;
+ length -= 16;
+ }
+ return 0;
}
void __init find_intel_smp (void)
{
- /*
- * 1) Scan the bottom 1K for a signature
- * 2) Scan the top 1K of base RAM
- * 3) Scan the 64K of bios
- */
- if (smp_scan_config(0x0,0x400) ||
- smp_scan_config(639*0x400,0x400) ||
- smp_scan_config(0xF0000,0x10000))
- return;
+ unsigned int address;
+
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (smp_scan_config(0x0,0x400) ||
+ smp_scan_config(639*0x400,0x400) ||
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There were Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. Thankfully the bootloaders
+ * now honour the EBDA.
+ */
+
+ address = *(unsigned short *)phys_to_virt(0x40E);
+ address <<= 4;
+ smp_scan_config(address, 0x1000);
+}
+
+#else
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+void __init find_visws_smp(void)
+{
+ smp_found_config = 1;
+
+ phys_cpu_present_map |= 2; /* or in id 1 */
+ apic_version[1] |= 0x10; /* integrated APIC */
+ apic_version[0] |= 0x10;
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
}
+#endif
+
/*
* - Intel MP Configuration Table
* - or SGI Visual Workstation configuration
*/
void __init find_smp_config (void)
{
- find_intel_smp();
+#ifdef CONFIG_X86_LOCAL_APIC
+ find_intel_smp();
+#endif
+#ifdef CONFIG_VISWS
+ find_visws_smp();
+#endif
}
diff --git a/xen-2.4.16/arch/i386/process.c b/xen-2.4.16/arch/i386/process.c
index a23f4b1557..d3cedf4766 100644
--- a/xen-2.4.16/arch/i386/process.c
+++ b/xen-2.4.16/arch/i386/process.c
@@ -364,7 +364,6 @@ void new_thread(struct task_struct *p,
/* NB. prev_p passed in %eax, next_p passed in %edx */
void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
- extern struct desc_struct idt_table[];
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
struct tss_struct *tss = init_tss + smp_processor_id();
diff --git a/xen-2.4.16/arch/i386/setup.c b/xen-2.4.16/arch/i386/setup.c
index 924d0ce2a1..e81f2da3ff 100644
--- a/xen-2.4.16/arch/i386/setup.c
+++ b/xen-2.4.16/arch/i386/setup.c
@@ -283,6 +283,9 @@ void __init start_of_day(void)
extern void ac_timer_init(void);
extern int setup_network_devices(void);
extern void net_init(void);
+ extern void initialize_block_io(void);
+ extern void initialize_serial(void);
+ extern void initialize_keyboard(void);
unsigned long low_mem_size;
@@ -338,9 +341,15 @@ void __init start_of_day(void)
pci_init();
#endif
do_initcalls();
+
+ initialize_serial(); /* setup serial 'driver' (for debugging) */
+ initialize_keyboard(); /* setup keyboard (also for debugging) */
+
if ( !setup_network_devices() )
panic("Must have a network device!\n");
- net_init(); /* initializes virtual network system. */
+ net_init(); /* initializes virtual network system. */
+ initialize_block_io(); /* setup block devices */
+
#ifdef CONFIG_SMP
wait_init_idle = cpu_online_map;
diff --git a/xen-2.4.16/arch/i386/smpboot.c b/xen-2.4.16/arch/i386/smpboot.c
index dd0f94bd13..6afdd0ecfd 100644
--- a/xen-2.4.16/arch/i386/smpboot.c
+++ b/xen-2.4.16/arch/i386/smpboot.c
@@ -395,6 +395,10 @@ int cpucount;
*/
int __init start_secondary(void *unused)
{
+ unsigned int cpu = smp_processor_id();
+ /* 6 bytes suitable for passing to LIDT instruction. */
+ unsigned char idt_load[6];
+
extern void cpu_init(void);
/*
@@ -409,6 +413,16 @@ int __init start_secondary(void *unused)
rep_nop();
/*
+ * At this point, boot CPU has fully initialised the IDT. It is
+ * now safe to make ourselves a private copy.
+ */
+ idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL);
+ memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
+ *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
+ *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+ __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+
+ /*
* low-memory mappings have been cleared, flush them from the local TLBs
* too.
*/
diff --git a/xen-2.4.16/arch/i386/traps.c b/xen-2.4.16/arch/i386/traps.c
index cdea19eaa6..b8297fe3eb 100644
--- a/xen-2.4.16/arch/i386/traps.c
+++ b/xen-2.4.16/arch/i386/traps.c
@@ -43,12 +43,10 @@ asmlinkage int hypervisor_call(void);
asmlinkage void lcall7(void);
asmlinkage void lcall27(void);
-/*
- * The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
- */
-struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+/* Master table, and the one used by CPU0. */
+struct desc_struct idt_table[256] = { {0, 0}, };
+/* All other CPUs have their own copy. */
+struct desc_struct *idt_tables[NR_CPUS] = { 0 };
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@@ -299,7 +297,12 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
ti = current->thread.traps + (error_code>>3);
if ( ti->dpl >= (regs->xcs & 3) )
{
- if ( (error_code>>3)==0x80 ) { printk("!!!\n"); BUG(); }
+ /* XXX Kill next conditional soon :-) XXX */
+ if ( (error_code>>3)==0x80 )
+ {
+ printk("DIDN'T USE FAST-TRAP HANDLER FOR 0x80!!! :-(\n");
+ BUG();
+ }
gtb->flags = GTBF_TRAP_NOCODE;
gtb->cs = ti->cs;
gtb->eip = ti->address;
@@ -542,6 +545,9 @@ void __init trap_init(void)
/* Only ring 1 can access monitor services. */
_set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call);
+ /* CPU0 uses the master IDT. */
+ idt_tables[0] = idt_table;
+
/*
* Should be a barrier for any external CPU state.
*/
diff --git a/xen-2.4.16/common/event.c b/xen-2.4.16/common/event.c
index 6a81c63f8b..4514d02eb3 100644
--- a/xen-2.4.16/common/event.c
+++ b/xen-2.4.16/common/event.c
@@ -15,13 +15,15 @@ typedef void (*hyp_event_callback_fn_t)(void);
extern void schedule(void);
extern void flush_rx_queue(void);
+extern void flush_blk_queue(void);
/* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
static hyp_event_callback_fn_t event_call_fn[] =
{
schedule,
flush_rx_queue,
- kill_domain
+ kill_domain,
+ flush_blk_queue
};
/* Handle outstanding events for the currently-executing domain. */
diff --git a/xen-2.4.16/drivers/Makefile b/xen-2.4.16/drivers/Makefile
index 5aa320fcbe..bee17fa208 100644
--- a/xen-2.4.16/drivers/Makefile
+++ b/xen-2.4.16/drivers/Makefile
@@ -1,12 +1,16 @@
default:
+ $(MAKE) -C char
$(MAKE) -C pci
$(MAKE) -C net
$(MAKE) -C block
$(MAKE) -C ide
+# $(MAKE) -C scsi
clean:
+ $(MAKE) -C char clean
$(MAKE) -C pci clean
$(MAKE) -C net clean
$(MAKE) -C block clean
$(MAKE) -C ide clean
+# $(MAKE) -C scsi clean
diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c
index 0ee8477c71..06d9fb72e9 100644
--- a/xen-2.4.16/drivers/block/ll_rw_blk.c
+++ b/xen-2.4.16/drivers/block/ll_rw_blk.c
@@ -31,8 +31,12 @@
#include <xeno/slab.h>
#include <xeno/module.h>
+static void end_buffer_dummy(struct buffer_head *bh, int uptodate)
+{
+ /* do nothing */
+}
+
/* This will die as all synchronous stuff is coming to an end */
-#define end_buffer_io_sync NULL
#define complete(_r) panic("completion.h stuff may be needed...")
/*
@@ -307,10 +311,14 @@ static void generic_plug_device(request_queue_t *q, kdev_t dev)
*/
static inline void __generic_unplug_device(request_queue_t *q)
{
+ /* printk(KERN_ALERT "__generic_unplug_device %p %d\n", q, q->plugged); */
if (q->plugged) {
q->plugged = 0;
if (!list_empty(&q->queue_head))
+ {
+ /* printk(KERN_ALERT " calling %p\n", q->request_fn); */
q->request_fn(q);
+ }
}
}
@@ -319,6 +327,8 @@ void generic_unplug_device(void *data)
request_queue_t *q = (request_queue_t *) data;
unsigned long flags;
+ /* printk(KERN_ALERT "generic_unplug_device\n"); */
+
spin_lock_irqsave(&io_request_lock, flags);
__generic_unplug_device(q);
spin_unlock_irqrestore(&io_request_lock, flags);
@@ -856,6 +866,8 @@ static int __make_request(request_queue_t * q, int rw,
int latency;
elevator_t *elevator = &q->elevator;
+ /* printk(KERN_ALERT "__make_request\n");*/
+
count = bh->b_size >> 9;
sector = bh->b_rsector;
@@ -1061,6 +1073,8 @@ void generic_make_request (int rw, struct buffer_head * bh)
int minorsize = 0;
request_queue_t *q;
+ /* printk(KERN_ALERT "generic_make_request\n"); */
+
if (!bh->b_end_io)
BUG();
@@ -1130,6 +1144,8 @@ void submit_bh(int rw, struct buffer_head * bh)
{
int count = bh->b_size >> 9;
+ /* printk(KERN_ALERT "submit_bh\n"); */
+
if (!test_bit(BH_Lock, &bh->b_state))
BUG();
@@ -1141,7 +1157,7 @@ void submit_bh(int rw, struct buffer_head * bh)
* further remap this.
*/
bh->b_rdev = bh->b_dev;
- bh->b_rsector = bh->b_blocknr * count;
+ /* bh->b_rsector = bh->b_blocknr * count; */
generic_make_request(rw, bh);
@@ -1194,6 +1210,8 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
int correct_size;
int i;
+ /* printk(KERN_ALERT "ll_rw_block %d %d\n", rw, nr); */
+
if (!nr)
return;
@@ -1229,14 +1247,14 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
/* We have the buffer lock */
atomic_inc(&bh->b_count);
- bh->b_end_io = end_buffer_io_sync;
+ bh->b_end_io = end_buffer_dummy;
switch(rw) {
case WRITE:
if (!atomic_set_buffer_clean(bh))
/* Hmmph! Nothing to write */
goto end_io;
- __mark_buffer_clean(bh);
+ /* __mark_buffer_clean(bh); */
break;
case READA:
@@ -1302,6 +1320,7 @@ int end_that_request_first (struct request *req, int uptodate, char *name)
req->bh = bh->b_reqnext;
bh->b_reqnext = NULL;
bh->b_end_io(bh, uptodate);
+ end_block_io_op(bh);
if ((bh = req->bh) != NULL) {
req->hard_sector += nsect;
req->hard_nr_sectors -= nsect;
diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c
new file mode 100644
index 0000000000..b6d0e8993f
--- /dev/null
+++ b/xen-2.4.16/drivers/block/xen_block.c
@@ -0,0 +1,620 @@
+/*
+ * xen-block.c
+ *
+ * process incoming block io requests from guestos's.
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/blkdev.h>
+#include <xeno/event.h> /* mark_hyp_event */
+#include <hypervisor-ifs/block.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <asm-i386/io.h>
+#include <asm/spinlock.h>
+
+#define XEN_BLK_DEBUG 0
+#define XEN_BLK_DEBUG_LEVEL KERN_ALERT
+
+#define XEN_BLK_REQUEST_LIST_SIZE 256 /* very arbitrary */
+
+typedef struct blk_request
+{
+ struct list_head queue;
+ struct buffer_head *bh;
+ blk_ring_entry_t request;
+ struct task_struct *domain; /* requesting domain */
+} blk_request_t;
+
+static int pending_work; /* which domains have work for us? */
+blk_request_t blk_request_list[XEN_BLK_REQUEST_LIST_SIZE];
+struct list_head free_queue; /* unused requests */
+struct list_head pending_queue; /* waiting for hardware */
+struct list_head io_done_queue; /* request completed. send to guest os */
+spinlock_t free_queue_lock;
+spinlock_t pending_queue_lock;
+spinlock_t io_done_queue_lock;
+
+/* some definitions */
+void dumpx (char *buffer, int count);
+void printx (char * string);
+long do_block_io_op_domain (struct task_struct* task);
+int dispatch_rw_block_io (int index);
+int dispatch_probe_block_io (int index);
+int dispatch_debug_block_io (int index);
+
+/*
+ * end_block_io_op
+ *
+ * IO has completed. Need to notify the guest operating system.
+ * Called from hardware interrupt.
+ */
+
+void end_block_io_op(struct buffer_head * bh)
+{
+ unsigned long cpu_mask;
+ /* struct list_head *list;*/
+ blk_request_t *blk_request = NULL;
+ unsigned long flags; /* irq save */
+
+#if 0
+ printk("{E}");
+#endif
+ if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL
+ "XEN end_block_io_op, bh: %lx\n",
+ (unsigned long)bh);
+
+ {
+ char temp[100];
+ sprintf(temp, "endio bh: 0x%p, blkno: 0x%lx",
+ bh, bh->b_blocknr);
+ printx(temp);
+ }
+
+ spin_lock_irqsave(&pending_queue_lock, flags);
+ /*
+ list_for_each (list, &pending_queue)
+ {
+ blk_request = list_entry(list, blk_request_t, queue);
+ if (blk_request->bh == bh)
+ {
+ break;
+ }
+ }
+ */
+ blk_request = (blk_request_t *)bh->b_xen_request;
+ if (blk_request == NULL)
+ {
+ printk (KERN_ALERT
+ " block io interrupt received for unknown buffer [0x%lx]\n",
+ (unsigned long) bh);
+ spin_unlock_irqrestore(&pending_queue_lock, flags);
+ return;
+ }
+ list_del(&blk_request->queue);
+ spin_unlock_irqrestore(&pending_queue_lock, flags);
+
+ spin_lock_irqsave(&io_done_queue_lock, flags);
+ list_add_tail(&blk_request->queue, &io_done_queue);
+ spin_unlock_irqrestore(&io_done_queue_lock, flags);
+
+ /* enqueue work */
+ cpu_mask = mark_hyp_event(blk_request->domain, _HYP_EVENT_BLK_RX);
+
+ return;
+}
+
+/*
+ * flush_blk_queue
+ *
+ * Called by the hypervisor synchronously when there is something to do
+ * (block transfers have completed)
+ */
+
+void flush_blk_queue(void)
+{
+ blk_request_t *blk_request;
+ int position = 0;
+ blk_ring_t *blk_ring;
+ unsigned long flags;
+ int loop;
+
+#if 0
+ printk("{F}");
+#endif
+ /*
+ if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL
+ "XEN flush_blk_queue\n");
+ */
+
+ clear_bit(_HYP_EVENT_BLK_RX, &current->hyp_events);
+
+ /* NEED LOCK? */
+ spin_lock_irqsave(&io_done_queue_lock, flags);
+ while (!list_empty(&io_done_queue))
+ {
+ blk_request = list_entry(io_done_queue.next, blk_request_t, queue);
+ list_del (&blk_request->queue);
+ spin_unlock_irqrestore(&io_done_queue_lock, flags);
+
+ /* place on ring for guest os */
+ blk_ring = blk_request->domain->blk_ring_base;
+ position = blk_ring->rx_prod;
+
+ if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL
+ "XEN flush_blk_queue [%d]\n", position);
+
+ memcpy(&blk_ring->rx_ring[position], &blk_request->request,
+ sizeof(blk_ring_entry_t));
+ blk_ring->rx_prod = BLK_RX_RING_INC(blk_ring->rx_prod);
+
+ /* notify appropriate guest os */
+ set_bit(_EVENT_BLK_RX,
+ &blk_request->domain->shared_info->events);
+
+ if (0)
+ {
+ int temp;
+ struct buffer_head *bh = blk_request->bh;
+ char * vbuffer = bh->b_data;
+
+ printk (KERN_ALERT "XEN return block 0x%lx\n", bh->b_blocknr);
+
+ for (temp = 0; temp < bh->b_size; temp++)
+ {
+ if (temp % 16 == 0) printk ("[%04x] ", temp);
+ else if (temp % 4 == 0) printk (" ");
+ printk ("%02x",
+ vbuffer[temp] & 255);
+ if ((temp + 1) % 16 == 0) printk ("\n");
+ }
+ printk ("\n\n");
+ }
+
+ /* free the buffer header allocated in do_block_io_op */
+ if (blk_request->bh)
+ {
+ kfree(blk_request->bh); /* alloc in do_block_io_op */
+ }
+
+ spin_lock_irqsave(&free_queue_lock, flags);
+ list_add_tail(&blk_request->queue, &free_queue);
+ spin_unlock_irqrestore(&free_queue_lock, flags);
+
+ spin_lock_irqsave(&io_done_queue_lock, flags);
+ }
+ spin_unlock_irqrestore(&io_done_queue_lock, flags);
+
+ /*
+ * now check if there is any pending work from any domain
+ * that we were previously unable to process.
+ *
+ * NOTE: the current algorithm will check _every_ domain
+ * and wake up _every_ domain that has pending work.
+ * In the future, we should stop waking up domains once
+ * there isn't any space for their requests any more
+ * ALSO, we need to maintain a counter of the last domain
+ * that we woke up for fairness... we shouldn't restart
+ * at domain 0 every time (although we might want to special
+ * case domain 0);
+ */
+ for (loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++)
+ {
+ int domain = pending_work & (1 << loop);
+
+ if (domain)
+ {
+ struct task_struct *mytask = current;
+
+ /*
+ printk (KERN_ALERT
+ "flush_blk_queue pending_work: %x domain: %d loop: %d\n",
+ pending_work, domain, loop);
+ */
+ /* IS THERE A BETTER WAY OF FINDING THE TASK STRUCT FOR A
+ * PARTICULAR DOMAIN?
+ *
+ * WHAT IF THE TASK GOES AWAY BEFORE WE HAVE A CHANCE TO
+ * FINISH PROCESSING ALL OF ITS REQUESTS?
+ */
+ while (mytask->domain != loop)
+ {
+ mytask = mytask->next_task;
+ }
+ do_block_io_op_domain(mytask);
+
+ pending_work = pending_work & !(1 << loop);
+ /*
+ printk (KERN_ALERT
+ " pending_work: %x domain: %d loop: %d\n",
+ pending_work, domain, loop);
+ */
+ }
+ }
+}
+
+/*
+ * do_block_io_op
+ *
+ * Accept a block io request from a guest operating system.
+ * There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S).
+ */
+
+long do_block_io_op (void)
+{
+ return do_block_io_op_domain(current);
+}
+
+/*
+ * do_block_io_op
+ *
+ * handle the requests for a particular domain
+ */
+
+long do_block_io_op_domain (struct task_struct* task)
+{
+ blk_ring_t *blk_ring = task->blk_ring_base;
+ int loop;
+
+#if 0
+ printk("{%d}", current->domain);
+#endif
+ if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL
+ "XEN do_block_io_op %d %d\n",
+ blk_ring->tx_cons, blk_ring->tx_prod);
+
+ for (loop = blk_ring->tx_cons;
+ loop != blk_ring->tx_prod;
+ loop = BLK_TX_RING_INC(loop))
+ {
+ int status = 1;
+
+ switch (blk_ring->tx_ring[loop].operation)
+ {
+ case XEN_BLOCK_READ :
+ case XEN_BLOCK_WRITE :
+ {
+ status = dispatch_rw_block_io(loop);
+ break;
+ }
+ case XEN_BLOCK_PROBE :
+ {
+ status = dispatch_probe_block_io(loop);
+ break;
+ }
+ case XEN_BLOCK_DEBUG :
+ {
+ status = dispatch_debug_block_io(loop);
+ break;
+ }
+ default :
+ {
+ printk (KERN_ALERT "error: unknown block io operation [%d]\n",
+ blk_ring->tx_ring[loop].operation);
+ BUG();
+ }
+ }
+
+ if (status)
+ {
+ /* unable to successfully issue / complete command, maybe because
+ * another resource (e.g. disk request buffers) is unavailable.
+ * stop removing items from the communications ring and try
+ * again later
+ */
+
+ /*
+ printk ("do_block_io_op_domain domain:%d, pending_work: %x\n",
+ task->domain, pending_work);
+ */
+ pending_work = pending_work | (1 << task->domain);
+ /*
+ printk ("do_block_io_op_domain domain:%d, pending_work: %x\n",
+ task->domain, pending_work);
+ */
+ break;
+ }
+ }
+
+ blk_ring->tx_cons = loop;
+
+ return 0L;
+}
+
+int dispatch_debug_block_io (int index)
+{
+ struct task_struct *task;
+ blk_ring_t *blk_ring = current->blk_ring_base;
+ char * buffer;
+ char output[1000];
+
+ int foobar = (unsigned long)blk_ring->tx_ring[index].block_number;
+
+ printk (KERN_ALERT "dispatch_debug_block_io %d\n", foobar);
+
+ buffer = phys_to_virt(blk_ring->tx_ring[index].buffer);
+ strcpy (buffer, "DEBUG\n");
+
+ task = current;
+ sprintf (buffer, "current %d\n", current->domain);
+ sprintf (buffer, "%s tx: prod: %d, cons: %d, size: %d\n", buffer,
+ blk_ring->tx_prod, blk_ring->tx_cons, blk_ring->tx_ring_size);
+ sprintf (buffer, "%s rx: prod: %d, cons: %d, size: %d\n", buffer,
+ blk_ring->rx_prod, blk_ring->rx_cons, blk_ring->rx_ring_size);
+
+ task = task->next_task;
+ while (task != current)
+ {
+ blk_ring = task->blk_ring_base;
+ sprintf (buffer, "%stask %d\n", buffer, task->domain);
+ if (blk_ring != NULL)
+ {
+ sprintf (buffer, "%s tx: prod: %d, cons: %d, size: %d\n",
+ buffer, blk_ring->tx_prod, blk_ring->tx_cons,
+ blk_ring->tx_ring_size);
+ sprintf (buffer, "%s rx: prod: %d, cons: %d, size: %d\n",
+ buffer, blk_ring->rx_prod, blk_ring->rx_cons,
+ blk_ring->rx_ring_size);
+ }
+ task = task->next_task;
+ }
+ dumpx(output, foobar);
+ sprintf (buffer, "%s%s\n", buffer, output);
+
+ return 0;
+}
+
+int dispatch_probe_block_io (int index)
+{
+ blk_ring_t *blk_ring = current->blk_ring_base;
+ xen_disk_info_t *xdi;
+
+ xdi = phys_to_virt(blk_ring->tx_ring[index].buffer);
+
+ ide_probe_devices(xdi);
+
+ return 0;
+}
+
+int dispatch_rw_block_io (int index)
+{
+ blk_ring_t *blk_ring = current->blk_ring_base;
+ struct buffer_head *bh;
+ struct request_queue *rq;
+ int operation;
+ blk_request_t *blk_request;
+ unsigned long flags;
+
+ /*
+ * check to make sure that the block request seems at least
+ * a bit legitimate
+ */
+ if ((blk_ring->tx_ring[index].block_size & (0x200 - 1)) != 0)
+ {
+ printk(KERN_ALERT
+ " error: dodgy block size: %d\n",
+ blk_ring->tx_ring[index].block_size);
+ BUG();
+ }
+
+ if (XEN_BLK_DEBUG)
+ {
+ printk(XEN_BLK_DEBUG_LEVEL
+ " tx_cons: %d tx_prod %d index: %d op: %s, pri: %s\n",
+ blk_ring->tx_cons, blk_ring->tx_prod, index,
+ (blk_ring->tx_ring[index].operation == XEN_BLOCK_READ ? "read" : "write"),
+ (blk_ring->tx_ring[index].priority == XEN_BLOCK_SYNC ? "sync" : "async"));
+ }
+
+ {
+ char temp[100];
+ sprintf(temp, "issue buf: 0x%p, bh: 0x%p, blkno: 0x%lx",
+ blk_ring->tx_ring[index].buffer, bh,
+ (unsigned long)blk_ring->tx_ring[index].block_number);
+ printx(temp);
+ }
+
+ /* find an empty request slot */
+ spin_lock_irqsave(&free_queue_lock, flags);
+ if (list_empty(&free_queue))
+ {
+ /* printk (KERN_ALERT "dispatch_rw_block_io EMPTY FREE LIST!! %d\n", index); */
+ spin_unlock_irqrestore(&free_queue_lock, flags);
+ return 1;
+ }
+ blk_request = list_entry(free_queue.next, blk_request_t, queue);
+ list_del(&blk_request->queue);
+ spin_unlock_irqrestore(&free_queue_lock, flags);
+
+ /* place request on pending list */
+ spin_lock_irqsave(&pending_queue_lock, flags);
+ list_add_tail(&blk_request->queue, &pending_queue);
+ spin_unlock_irqrestore(&pending_queue_lock, flags);
+
+ /* we'll be doing this frequently, would a cache be appropriate? */
+ /* free in flush_blk_queue */
+ bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head),
+ GFP_KERNEL);
+ if (!bh)
+ {
+ printk(KERN_ALERT "ERROR: bh is null\n");
+ BUG();
+ }
+
+ /* set just the important bits of the buffer header */
+ memset (bh, 0, sizeof (struct buffer_head));
+
+ bh->b_blocknr = blk_ring->tx_ring[index].block_number; /* block number */
+ bh->b_size = blk_ring->tx_ring[index].block_size; /* block size */
+ bh->b_dev = blk_ring->tx_ring[index].device; /* device (B_FREE = free) */
+ bh->b_rsector = blk_ring->tx_ring[index].sector_number; /* sector number */
+
+ bh->b_data = phys_to_virt(blk_ring->tx_ring[index].buffer);
+ /* ptr to data blk */
+ bh->b_count.counter = 1; /* users using this block */
+ bh->b_xen_request = (void *)blk_request; /* save block request */
+
+
+ if (blk_ring->tx_ring[index].operation == XEN_BLOCK_WRITE)
+ {
+ bh->b_state = ((1 << BH_JBD) | /* buffer state bitmap */
+ (1 << BH_Mapped) |
+ (1 << BH_Req) |
+ (1 << BH_Dirty) |
+ (1 << BH_Uptodate));
+ operation = WRITE;
+ }
+ else
+ {
+ bh->b_state = (1 << BH_Mapped); /* buffer state bitmap */
+ operation = READ;
+ }
+
+ /* save meta data about request */
+ memcpy(&blk_request->request, /* NEED COPY_FROM_USER? */
+ &blk_ring->tx_ring[index], sizeof(blk_ring_entry_t));
+ blk_request->bh = bh;
+ blk_request->domain = current; /* save current domain */
+
+ /* dispatch single block request */
+ ll_rw_block(operation, 1, &bh); /* linux top half */
+ rq = blk_get_queue(bh->b_rdev);
+ generic_unplug_device(rq); /* linux bottom half */
+
+ return 0;
+}
+
+/*
+ * initialize_block_io
+ *
+ * initialize everything for block io
+ * called from arch/i386/setup.c::start_of_day
+ */
+
+void initialize_block_io ()
+{
+ int loop;
+
+ INIT_LIST_HEAD(&free_queue);
+ INIT_LIST_HEAD(&pending_queue);
+ INIT_LIST_HEAD(&io_done_queue);
+
+ spin_lock_init(&free_queue_lock);
+ spin_lock_init(&pending_queue_lock);
+ spin_lock_init(&io_done_queue_lock);
+
+ for (loop = 0; loop < XEN_BLK_REQUEST_LIST_SIZE; loop++)
+ {
+ list_add_tail(&blk_request_list[loop].queue, &free_queue);
+ }
+
+ /*
+ * if bit i is true then domain i has work for us to do.
+ */
+ pending_work = 0;
+
+ return;
+}
+
+
+#ifdef DEBUG
+
+/*
+ * debug dump_queue
+ * arguments: queue head, name of queue
+ */
+void dump_queue(struct list_head *queue, char *name)
+{
+ struct list_head *list;
+ int loop = 0;
+
+ printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each (list, queue)
+ {
+ printk (" %s %d : %lx n: %lx, p: %lx\n", name, loop++,
+ (unsigned long)list,
+ (unsigned long)list->next, (unsigned long)list->prev);
+ }
+}
+
+void dump_queue_head(struct list_head *queue, char *name)
+{
+ struct list_head *list;
+ int loop = 0;
+
+ printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each (list, queue)
+ {
+ printk (" %d : %lx n: %lx, p: %lx\n", loop++,
+ (unsigned long)list,
+ (unsigned long)list->next, (unsigned long)list->prev);
+ if (loop >= 5) return;
+ }
+}
+
+#endif /* DEBUG */
+
+
+#define debug_block_size 200000
+#define debug_output_size 10
+
+static int countx = 0;
+static char * arrayx[debug_block_size];
+static int outputx = 0;
+
+void
+printx (char * string)
+{
+ char * s;
+
+ s = (char *) kmalloc(strlen(string), GFP_KERNEL);
+ strcpy (s, string);
+ arrayx[countx++] = s;
+
+ if (countx >= debug_block_size)
+ {
+ countx = 0;
+ printk (KERN_ALERT "printx wrap\n");
+ }
+
+}
+
+void
+dumpx (char *buffer, int count)
+{
+ int loop;
+ int start;
+
+ sprintf (buffer, "debug dump\n");
+
+ /*
+ for (loop = outputx;
+ loop < outputx + debug_output_size && loop < countx;
+ loop ++)
+ {
+ sprintf (buffer, "%s%02d:%s\n", buffer, loop, arrayx[loop]);
+ }
+ outputx = loop;
+ */
+
+ if (count == 0 || count > countx)
+ {
+ start = 0;
+ }
+ else
+ {
+ start = countx - count;
+ }
+
+ printk (KERN_ALERT "DUMPX BUFFER\n");
+ for (loop = start; loop < countx; loop++)
+ {
+ printk (KERN_ALERT "%4d %s\n", loop, arrayx[loop]);
+ }
+ printk (KERN_ALERT "DUMPX bye bye\n");
+}
+
diff --git a/xen-2.4.16/drivers/char/Makefile b/xen-2.4.16/drivers/char/Makefile
new file mode 100644
index 0000000000..574b7d2d79
--- /dev/null
+++ b/xen-2.4.16/drivers/char/Makefile
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) -r -o driver.o $(OBJS)
+
+clean:
+ rm -f *.o *~ core
diff --git a/xen-2.4.16/drivers/char/xen_kbd.c b/xen-2.4.16/drivers/char/xen_kbd.c
new file mode 100644
index 0000000000..780028ab69
--- /dev/null
+++ b/xen-2.4.16/drivers/char/xen_kbd.c
@@ -0,0 +1,111 @@
+#include <asm-i386/io.h>
+#include <xeno/sched.h> /* this has request_irq() proto for some reason */
+
+#define KEYBOARD_IRQ 1
+
+#define KBD_STATUS_REG 0x64 /* Status register (R) */
+#define KBD_CNTL_REG 0x64 /* Controller command register (W) */
+#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */
+
+/* register status bits */
+#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */
+#define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */
+#define KBD_STAT_SELFTEST 0x04 /* Self test successful */
+#define KBD_STAT_CMD 0x08 /* Last write was a command write (0=data) */
+
+#define KBD_STAT_UNLOCKED 0x10 /* Zero if keyboard locked */
+#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
+#define KBD_STAT_GTO 0x40 /* General receive/xmit timeout */
+#define KBD_STAT_PERR 0x80 /* Parity error */
+
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+
+
+
+static void
+dispatch_scancode (unsigned char scancode)
+{
+
+ /*
+ * we could be a bit more clever here, but why?
+ * just add a jump to your debug routine for the appropriate character.
+ */
+ switch (scancode)
+ {
+ case 0x01 : /* esc */
+ printk ("<esc>");
+ break;
+ case 0x9e : /* a */
+ printk ("a");
+ break;
+ case 0x9f : /* s */
+ printk ("s");
+ break;
+ case 0xae : /* c */
+ printk ("c");
+ break;
+ case 0xb0 : /* b */
+ printk ("b");
+ break;
+ case 0xbb : /* f1 */
+ printk ("<f1>");
+ break;
+ case 0xbc : /* f2 */
+ printk ("<f2>");
+ break;
+ case 0xbd : /* f3 */
+ printk ("<f3>");
+ break;
+ case 0xbe : /* f4 */
+ printk ("<f4>");
+ break;
+ case 0xbf : /* f5 */
+ /* xen_block_dump_state(); */
+ break;
+ default :
+ /* printk ("%x ", scancode); */
+ }
+
+ return;
+}
+
+
+/* regs should be struct pt_regs */
+
+static void keyboard_interrupt(int irq, void *dev_id, void *regs)
+{
+ unsigned char status = kbd_read_status();
+ unsigned int work = 10000;
+
+ while ((--work > 0) && (status & KBD_STAT_OBF))
+ {
+ unsigned char scancode;
+
+ scancode = kbd_read_input();
+
+ if (!(status & (KBD_STAT_GTO | KBD_STAT_PERR)))
+ {
+ if (status & KBD_STAT_MOUSE_OBF)
+ /* mouse event, ignore */;
+ else
+ dispatch_scancode (scancode);
+ }
+ status = kbd_read_status();
+ }
+
+ if (!work)
+ printk(KERN_ERR "pc_keyb: controller jammed (0x%02X).\n", status);
+
+ return;
+}
+
+
+void initialize_keyboard()
+{
+ if(request_irq(KEYBOARD_IRQ, keyboard_interrupt, 0, "keyboard", NULL))
+ printk("initialize_keyboard: failed to alloc IRQ %d\n", KEYBOARD_IRQ);
+
+ return;
+}
+
diff --git a/xen-2.4.16/drivers/char/xen_serial.c b/xen-2.4.16/drivers/char/xen_serial.c
new file mode 100644
index 0000000000..7c62567fa4
--- /dev/null
+++ b/xen-2.4.16/drivers/char/xen_serial.c
@@ -0,0 +1,140 @@
+#include <asm-i386/io.h>
+#include <xeno/sched.h> /* this has request_irq() proto for some reason */
+
+
+/* Register offsets */
+#define NS16550_RBR 0x00 /* receive buffer */
+#define NS16550_THR 0x00 /* transmit holding */
+#define NS16550_IER 0x01 /* interrupt enable */
+#define NS16550_IIR 0x02 /* interrupt identity */
+#define NS16550_FCR 0x02 /* FIFO control */
+#define NS16550_LCR 0x03 /* line control */
+#define NS16550_MCR 0x04 /* MODEM control */
+#define NS16550_LSR 0x05 /* line status */
+#define NS16550_MSR 0x06 /* MODEM status */
+#define NS16550_SCR 0x07 /* scratch */
+#define NS16550_DDL 0x00 /* divisor latch (ls) ( DLAB=1) */
+#define NS16550_DLM 0x01 /* divisor latch (ms) ( DLAB=1) */
+
+/* Interrupt enable register */
+#define NS16550_IER_ERDAI 0x01 /* rx data recv'd */
+#define NS16550_IER_ETHREI 0x02 /* tx reg. empty */
+#define NS16550_IER_ELSI 0x04 /* rx line status */
+#define NS16550_IER_EMSI 0x08 /* MODEM status */
+
+/* FIFO control register */
+#define NS16550_FCR_ENABLE 0x01 /* enable FIFO */
+#define NS16550_FCR_CLRX 0x02 /* clear Rx FIFO */
+#define NS16550_FCR_CLTX 0x04 /* clear Tx FIFO */
+#define NS16550_FCR_DMA 0x10 /* enter DMA mode */
+#define NS16550_FCR_TRG1 0x00 /* Rx FIFO trig lev 1 */
+#define NS16550_FCR_TRG4 0x40 /* Rx FIFO trig lev 4 */
+#define NS16550_FCR_TRG8 0x80 /* Rx FIFO trig lev 8 */
+#define NS16550_FCR_TRG14 0xc0 /* Rx FIFO trig lev 14 */
+
+/* MODEM control register */
+#define NS16550_MCR_DTR 0x01 /* Data Terminal Ready */
+#define NS16550_MCR_RTS 0x02 /* Request to Send */
+#define NS16550_MCR_OUT1 0x04 /* OUT1: unused */
+#define NS16550_MCR_OUT2 0x08 /* OUT2: interrupt mask */
+#define NS16550_MCR_LOOP 0x10 /* Loop */
+
+#define SERIAL_BASE 0x3f8 /* XXX SMH: horrible hardwired COM1 */
+
+
+
+/*
+** We keep an array of 'handlers' for each key code between 0 and 255;
+** this is intended to allow very simple debugging routines (toggle
+** debug flag, dump registers, reboot, etc) to be hooked in in a slightly
+** nicer fashion than just editing this file :-)
+*/
+
+#define KEY_MAX 256
+typedef void key_handler(u_char key);
+
+static key_handler *key_table[KEY_MAX];
+
+void add_key_handler(u_char key, key_handler *handler)
+{
+ if(key_table[key] != NULL)
+ printk("Warning: overwriting handler for key 0x%x\n", key);
+
+ key_table[key] = handler;
+ return;
+}
+
+
+
+static int serial_echo = 0; /* default is not to echo; change with 'e' */
+
+void toggle_echo(u_char key)
+{
+ serial_echo = !serial_echo;
+ return;
+}
+
+
+void halt_machine(u_char key)
+{
+ /* This is 'debug me please' => just dump info and halt machine */
+ printk("serial_rx_int: got EOT => halting machine.\n");
+ printk("<not actually halting for now>\n");
+ return;
+}
+
+
+
+static void serial_rx_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ u_char c;
+
+ /* XXX SMH: should probably check this is an RX interrupt :-) */
+
+ /* clear the interrupt by reading the character */
+ c = inb(SERIAL_BASE + NS16550_RBR );
+
+ /* if there's a handler, call it: we trust it won't screw us too badly */
+ if(key_table[c])
+ (*key_table[c])(c);
+
+ if(serial_echo)
+ printk("%c", c);
+
+ return;
+}
+
+
+void initialize_serial()
+{
+ int i, fifo, rc;
+
+ /* first initialize key handler table */
+ for(i = 0; i < KEY_MAX; i++)
+ key_table[i] = (key_handler *)NULL;
+
+ /* setup own handlers */
+ add_key_handler(0x01, toggle_echo); /* <esc> to toggle echo */
+ add_key_handler(0x04, halt_machine); /* CTRL-D to 'halt' */
+
+
+ /* Should detect this, but must be a ns16550a at least, surely? */
+ fifo = 1;
+ if(fifo) {
+ /* Clear FIFOs, enable, trigger at 1 byte */
+ outb(NS16550_FCR_TRG1 | NS16550_FCR_ENABLE |
+ NS16550_FCR_CLRX | NS16550_FCR_CLTX, SERIAL_BASE+NS16550_FCR);
+ }
+
+ outb(NS16550_MCR_OUT2, SERIAL_BASE + NS16550_MCR); /* Modem control */
+ outb(NS16550_IER_ERDAI, SERIAL_BASE + NS16550_IER ); /* Setup interrupts */
+
+ /* XXX SMH: this is a hack; probably is IRQ4 but grab both anyway */
+ if((rc = request_irq(4, serial_rx_int, 0, "serial", (void *)0x1234)))
+ printk("initialize_serial: failed to get IRQ4, rc=%d\n", rc);
+
+ if((rc = request_irq(3, serial_rx_int, 0, "serial", (void *)0x1234)))
+ printk("initialize_serial: failed to get IRQ3, rc=%d\n", rc);
+
+ return;
+}
diff --git a/xen-2.4.16/drivers/ide/ide-disk.c b/xen-2.4.16/drivers/ide/ide-disk.c
index 984e53cd67..0d1cd113cd 100644
--- a/xen-2.4.16/drivers/ide/ide-disk.c
+++ b/xen-2.4.16/drivers/ide/ide-disk.c
@@ -420,13 +420,13 @@ static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsi
taskfile.device_head |= drive->select.all;
taskfile.command = command;
-#ifdef DEBUG
+ /* #ifdef DEBUG */
printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
if (lba) printk("LBAsect=%lld, ", block);
else printk("CHS=%d/%d/%d, ", cyl, head, sect);
printk("sectors=%ld, ", rq->nr_sectors);
printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
-#endif
+ /* #endif*/
memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
@@ -578,7 +578,8 @@ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsig
tasklets[9] = (task_ioreg_t) 0;
// tasklets[8] = (task_ioreg_t) (block>>32);
// tasklets[9] = (task_ioreg_t) (block>>40);
-#ifdef DEBUG
+ /*#ifdef DEBUG */
+ printk("[A]\n");
printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n",
drive->name,
(rq->cmd==READ)?"read":"writ",
@@ -590,7 +591,7 @@ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsig
drive->name, tasklets[3], tasklets[2],
tasklets[9], tasklets[8], tasklets[7],
tasklets[6], tasklets[5], tasklets[4]);
-#endif
+ /* #endif */
OUT_BYTE(tasklets[1], IDE_FEATURE_REG);
OUT_BYTE(tasklets[3], IDE_NSECTOR_REG);
OUT_BYTE(tasklets[7], IDE_SECTOR_REG);
@@ -1320,6 +1321,10 @@ static void idedisk_setup (ide_drive_t *drive)
struct hd_driveid *id = drive->id;
unsigned long capacity;
+
+ printk (KERN_ALERT
+ "ide-disk.c::idedisk_setup: chs %d %d %d\n",
+ drive->cyl, drive->head, drive->sect);
idedisk_add_settings(drive);
@@ -1383,7 +1388,7 @@ static void idedisk_setup (ide_drive_t *drive)
if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) &&
(!drive->forced_geom) && drive->bios_sect && drive->bios_head)
drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head;
- printk (KERN_INFO "XEN %s: %ld sectors", drive->name, capacity);
+ printk (KERN_INFO "[XEN] %s: %ld sectors", drive->name, capacity);
/* Give size in megabytes (MB), not mebibytes (MiB). */
/* We compute the exact rounded value, avoiding overflow. */
diff --git a/xen-2.4.16/drivers/ide/ide-disk.c.orig b/xen-2.4.16/drivers/ide/ide-disk.c.orig
new file mode 100644
index 0000000000..984e53cd67
--- /dev/null
+++ b/xen-2.4.16/drivers/ide/ide-disk.c.orig
@@ -0,0 +1,1550 @@
+/*
+ * linux/drivers/ide/ide-disk.c Version 1.10 June 9, 2000
+ *
+ * Copyright (C) 1994-1998 Linus Torvalds & authors (see below)
+ */
+
+/*
+ * Mostly written by Mark Lord <mlord@pobox.com>
+ * and Gadi Oxman <gadio@netvision.net.il>
+ * and Andre Hedrick <andre@linux-ide.org>
+ *
+ * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00 move disk only code from ide.c to ide-disk.c
+ * support optional byte-swapping of all data
+ * Version 1.01 fix previous byte-swapping code
+ * Version 1.02 remove ", LBA" from drive identification msgs
+ * Version 1.03 fix display of id->buf_size for big-endian
+ * Version 1.04 add /proc configurable settings and S.M.A.R.T support
+ * Version 1.05 add capacity support for ATA3 >= 8GB
+ * Version 1.06 get boot-up messages to show full cyl count
+ * Version 1.07 disable door-locking if it fails
+ * Version 1.08 fixed CHS/LBA translations for ATA4 > 8GB,
+ * process of adding new ATA4 compliance.
+ * fixed problems in allowing fdisk to see
+ * the entire disk.
+ * Version 1.09 added increment of rq->sector in ide_multwrite
+ * added UDMA 3/4 reporting
+ * Version 1.10 request queue changes, Ultra DMA 100
+ * Version 1.11 added 48-bit lba
+ * Version 1.12 adding taskfile io access method
+ */
+
+#define IDEDISK_VERSION "1.12"
+
+#undef REALLY_SLOW_IO /* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030)
+#else
+#define IS_PDC4030_DRIVE (0) /* auto-NULLs out pdc4030 code */
+#endif
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+# undef __TASKFILE__IO /* define __TASKFILE__IO */
+#else /* CONFIG_IDE_TASKFILE_IO */
+# undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifndef __TASKFILE__IO
+
+static void idedisk_bswap_data (void *buffer, int wcount)
+{
+ u16 *p = buffer;
+
+ while (wcount--) {
+ *p = *p << 8 | *p >> 8; p++;
+ *p = *p << 8 | *p >> 8; p++;
+ }
+}
+
+static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+ ide_input_data(drive, buffer, wcount);
+ if (drive->bswap)
+ idedisk_bswap_data(buffer, wcount);
+}
+
+static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+ if (drive->bswap) {
+ idedisk_bswap_data(buffer, wcount);
+ ide_output_data(drive, buffer, wcount);
+ idedisk_bswap_data(buffer, wcount);
+ } else
+ ide_output_data(drive, buffer, wcount);
+}
+
+#endif /* __TASKFILE__IO */
+
+/*
+ * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
+ * value for this drive (from its reported identification information).
+ *
+ * Returns: 1 if lba_capacity looks sensible
+ * 0 otherwise
+ *
+ * It is called only once for each drive.
+ */
+static int lba_capacity_is_ok (struct hd_driveid *id)
+{
+ unsigned long lba_sects, chs_sects, head, tail;
+
+ if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+ printk("48-bit Drive: %llu \n", id->lba_capacity_2);
+ return 1;
+ }
+
+ /*
+ * The ATA spec tells large drives to return
+ * C/H/S = 16383/16/63 independent of their size.
+ * Some drives can be jumpered to use 15 heads instead of 16.
+ * Some drives can be jumpered to use 4092 cyls instead of 16383.
+ */
+ if ((id->cyls == 16383
+ || (id->cyls == 4092 && id->cur_cyls == 16383)) &&
+ id->sectors == 63 &&
+ (id->heads == 15 || id->heads == 16) &&
+ id->lba_capacity >= 16383*63*id->heads)
+ return 1;
+
+ lba_sects = id->lba_capacity;
+ chs_sects = id->cyls * id->heads * id->sectors;
+
+ /* perform a rough sanity check on lba_sects: within 10% is OK */
+ if ((lba_sects - chs_sects) < chs_sects/10)
+ return 1;
+
+ /* some drives have the word order reversed */
+ head = ((lba_sects >> 16) & 0xffff);
+ tail = (lba_sects & 0xffff);
+ lba_sects = (head | (tail << 16));
+ if ((lba_sects - chs_sects) < chs_sects/10) {
+ id->lba_capacity = lba_sects;
+ return 1; /* lba_capacity is (now) good */
+ }
+
+ return 0; /* lba_capacity value may be bad */
+}
+
+#ifndef __TASKFILE__IO
+
+/*
+ * read_intr() is the handler for disk read/multread interrupts
+ */
+static ide_startstop_t read_intr (ide_drive_t *drive)
+{
+ byte stat;
+ int i;
+ unsigned int msect, nsect;
+ struct request *rq;
+
+ /* new way for dealing with premature shared PCI interrupts */
+ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
+ if (stat & (ERR_STAT|DRQ_STAT)) {
+ return ide_error(drive, "read_intr", stat);
+ }
+ /* no data yet, so wait for another interrupt */
+ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ msect = drive->mult_count;
+
+read_next:
+ rq = HWGROUP(drive)->rq;
+ if (msect) {
+ if ((nsect = rq->current_nr_sectors) > msect)
+ nsect = msect;
+ msect -= nsect;
+ } else
+ nsect = 1;
+ idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
+#ifdef DEBUG
+ printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
+ drive->name, rq->sector, rq->sector+nsect-1,
+ (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
+#endif
+ rq->sector += nsect;
+ rq->buffer += nsect<<9;
+ rq->errors = 0;
+ i = (rq->nr_sectors -= nsect);
+ if (((long)(rq->current_nr_sectors -= nsect)) <= 0)
+ ide_end_request(1, HWGROUP(drive));
+ if (i > 0) {
+ if (msect)
+ goto read_next;
+ ide_set_handler (drive, &read_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ return ide_stopped;
+}
+
+/*
+ * write_intr() is the handler for disk write interrupts
+ */
+static ide_startstop_t write_intr (ide_drive_t *drive)
+{
+ byte stat;
+ int i;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ struct request *rq = hwgroup->rq;
+
+ if (!OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+ printk("%s: write_intr error1: nr_sectors=%ld, stat=0x%02x\n", drive->name, rq->nr_sectors, stat);
+ } else {
+#ifdef DEBUG
+ printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n",
+ drive->name, rq->sector, (unsigned long) rq->buffer,
+ rq->nr_sectors-1);
+#endif
+ if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
+ rq->sector++;
+ rq->buffer += 512;
+ rq->errors = 0;
+ i = --rq->nr_sectors;
+ --rq->current_nr_sectors;
+ if (((long)rq->current_nr_sectors) <= 0)
+ ide_end_request(1, hwgroup);
+ if (i > 0) {
+ idedisk_output_data (drive, rq->buffer, SECTOR_WORDS);
+ ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ return ide_stopped;
+ }
+ return ide_stopped; /* the original code did this here (?) */
+ }
+ return ide_error(drive, "write_intr", stat);
+}
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multiple-sector write operation.
+ *
+ * Returns 0 on success.
+ *
+ * Note that we may be called from two contexts - the do_rw_disk context
+ * and IRQ context. The IRQ can happen any time after we've output the
+ * full "mcount" number of sectors, so we must make sure we update the
+ * state _before_ we output the final part of the data!
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount)
+{
+ ide_hwgroup_t *hwgroup= HWGROUP(drive);
+ struct request *rq = &hwgroup->wrq;
+
+ do {
+ char *buffer;
+ int nsect = rq->current_nr_sectors;
+
+ if (nsect > mcount)
+ nsect = mcount;
+ mcount -= nsect;
+ buffer = rq->buffer;
+
+ rq->sector += nsect;
+ rq->buffer += nsect << 9;
+ rq->nr_sectors -= nsect;
+ rq->current_nr_sectors -= nsect;
+
+ /* Do we move to the next bh after this? */
+ if (!rq->current_nr_sectors) {
+ struct buffer_head *bh = rq->bh->b_reqnext;
+
+ /* end early early we ran out of requests */
+ if (!bh) {
+ mcount = 0;
+ } else {
+ rq->bh = bh;
+ rq->current_nr_sectors = bh->b_size >> 9;
+ rq->buffer = bh->b_data;
+ }
+ }
+
+ /*
+ * Ok, we're all setup for the interrupt
+ * re-entering us on the last transfer.
+ */
+ idedisk_output_data(drive, buffer, nsect<<7);
+ } while (mcount);
+
+ return 0;
+}
+
+/*
+ * multwrite_intr() is the handler for disk multwrite interrupts
+ */
+static ide_startstop_t multwrite_intr (ide_drive_t *drive)
+{
+ byte stat;
+ int i;
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ struct request *rq = &hwgroup->wrq;
+
+ if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+ if (stat & DRQ_STAT) {
+ /*
+ * The drive wants data. Remember rq is the copy
+ * of the request
+ */
+ if (rq->nr_sectors) {
+ if (ide_multwrite(drive, drive->mult_count))
+ return ide_stopped;
+ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
+ return ide_started;
+ }
+ } else {
+ /*
+ * If the copy has all the blocks completed then
+ * we can end the original request.
+ */
+ if (!rq->nr_sectors) { /* all done? */
+ rq = hwgroup->rq;
+ for (i = rq->nr_sectors; i > 0;){
+ i -= rq->current_nr_sectors;
+ ide_end_request(1, hwgroup);
+ }
+ return ide_stopped;
+ }
+ }
+ return ide_stopped; /* the original code did this here (?) */
+ }
+ return ide_error(drive, "multwrite_intr", stat);
+}
+#endif /* __TASKFILE__IO */
+
+#ifdef __TASKFILE__IO
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block);
+
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ if (rq->cmd == READ)
+ goto good_command;
+ if (rq->cmd == WRITE)
+ goto good_command;
+
+ printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+ ide_end_request(0, HWGROUP(drive));
+ return ide_stopped;
+
+good_command:
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (IS_PDC4030_DRIVE) {
+ extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long);
+ return promise_rw_disk(drive, rq, block);
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) /* 48-bit LBA */
+ return lba_48_rw_disk(drive, rq, (unsigned long long) block);
+ if (drive->select.b.lba) /* 28-bit LBA */
+ return lba_28_rw_disk(drive, rq, (unsigned long) block);
+
+ /* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */
+ return chs_rw_disk(drive, rq, (unsigned long) block);
+}
+
+static task_ioreg_t get_command (ide_drive_t *drive, int cmd)
+{
+ int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0;
+
+#if 1
+ lba48bit = drive->addressing;
+#endif
+
+ if ((cmd == READ) && (drive->using_dma))
+ return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA;
+ else if ((cmd == READ) && (drive->mult_count))
+ return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD;
+ else if (cmd == READ)
+ return (lba48bit) ? WIN_READ_EXT : WIN_READ;
+ else if ((cmd == WRITE) && (drive->using_dma))
+ return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA;
+ else if ((cmd == WRITE) && (drive->mult_count))
+ return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE;
+ else if (cmd == WRITE)
+ return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE;
+ else
+ return WIN_NOP;
+}
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_task_t args;
+
+ task_ioreg_t command = get_command(drive, rq->cmd);
+ unsigned int track = (block / drive->sect);
+ unsigned int sect = (block % drive->sect) + 1;
+ unsigned int head = (track % drive->head);
+ unsigned int cyl = (track / drive->head);
+
+ memset(&taskfile, 0, sizeof(task_struct_t));
+ memset(&hobfile, 0, sizeof(hob_struct_t));
+
+ taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+ taskfile.sector_number = sect;
+ taskfile.low_cylinder = cyl;
+ taskfile.high_cylinder = (cyl>>8);
+ taskfile.device_head = head;
+ taskfile.device_head |= drive->select.all;
+ taskfile.command = command;
+
+#ifdef DEBUG
+ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+ if (lba) printk("LBAsect=%lld, ", block);
+ else printk("CHS=%d/%d/%d, ", cyl, head, sect);
+ printk("sectors=%ld, ", rq->nr_sectors);
+ printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+ memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+ args.command_type = ide_cmd_type_parser(&args);
+ args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile);
+ args.handler = ide_handler_parser(&taskfile, &hobfile);
+ args.posthandler = NULL;
+ args.rq = (struct request *) rq;
+ args.block = block;
+ rq->special = NULL;
+ rq->special = (ide_task_t *)&args;
+
+ return do_rw_taskfile(drive, &args);
+}
+
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_task_t args;
+
+ task_ioreg_t command = get_command(drive, rq->cmd);
+
+ memset(&taskfile, 0, sizeof(task_struct_t));
+ memset(&hobfile, 0, sizeof(hob_struct_t));
+
+ taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+ taskfile.sector_number = block;
+ taskfile.low_cylinder = (block>>=8);
+ taskfile.high_cylinder = (block>>=8);
+ taskfile.device_head = ((block>>8)&0x0f);
+ taskfile.device_head |= drive->select.all;
+ taskfile.command = command;
+
+
+#ifdef DEBUG
+ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+ if (lba) printk("LBAsect=%lld, ", block);
+ else printk("CHS=%d/%d/%d, ", cyl, head, sect);
+ printk("sectors=%ld, ", rq->nr_sectors);
+ printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+ memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+ args.command_type = ide_cmd_type_parser(&args);
+ args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile);
+ args.handler = ide_handler_parser(&taskfile, &hobfile);
+ args.posthandler = NULL;
+ args.rq = (struct request *) rq;
+ args.block = block;
+ rq->special = NULL;
+ rq->special = (ide_task_t *)&args;
+
+ return do_rw_taskfile(drive, &args);
+}
+
+/*
+ * 268435455 == 137439 MB or 28bit limit
+ * 320173056 == 163929 MB or 48bit addressing
+ * 1073741822 == 549756 MB or 48bit addressing fake drive
+ */
+
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_task_t args;
+
+ task_ioreg_t command = get_command(drive, rq->cmd);
+
+ memset(&taskfile, 0, sizeof(task_struct_t));
+ memset(&hobfile, 0, sizeof(hob_struct_t));
+
+ taskfile.sector_count = rq->nr_sectors;
+ hobfile.sector_count = (rq->nr_sectors>>8);
+
+ if (rq->nr_sectors == 65536) {
+ taskfile.sector_count = 0x00;
+ hobfile.sector_count = 0x00;
+ }
+
+ taskfile.sector_number = block; /* low lba */
+ taskfile.low_cylinder = (block>>=8); /* mid lba */
+ taskfile.high_cylinder = (block>>=8); /* hi lba */
+ hobfile.sector_number = (block>>=8); /* low lba */
+ hobfile.low_cylinder = (block>>=8); /* mid lba */
+ hobfile.high_cylinder = (block>>=8); /* hi lba */
+ taskfile.device_head = drive->select.all;
+ hobfile.device_head = taskfile.device_head;
+ hobfile.control = (drive->ctl|0x80);
+ taskfile.command = command;
+
+#ifdef DEBUG
+ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+ if (lba) printk("LBAsect=%lld, ", block);
+ else printk("CHS=%d/%d/%d, ", cyl, head, sect);
+ printk("sectors=%ld, ", rq->nr_sectors);
+ printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+ memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+ args.command_type = ide_cmd_type_parser(&args);
+ args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile);
+ args.handler = ide_handler_parser(&taskfile, &hobfile);
+ args.posthandler = NULL;
+ args.rq = (struct request *) rq;
+ args.block = block;
+ rq->special = NULL;
+ rq->special = (ide_task_t *)&args;
+
+ return do_rw_taskfile(drive, &args);
+}
+
+#else /* !__TASKFILE__IO */
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+ if (IDE_CONTROL_REG)
+ OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (drive->select.b.lba || IS_PDC4030_DRIVE) {
+#else /* !CONFIG_BLK_DEV_PDC4030 */
+ if (drive->select.b.lba) {
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+ task_ioreg_t tasklets[10];
+
+ tasklets[0] = 0;
+ tasklets[1] = 0;
+ tasklets[2] = rq->nr_sectors;
+ tasklets[3] = (rq->nr_sectors>>8);
+ if (rq->nr_sectors == 65536) {
+ tasklets[2] = 0x00;
+ tasklets[3] = 0x00;
+ }
+ tasklets[4] = (task_ioreg_t) block;
+ tasklets[5] = (task_ioreg_t) (block>>8);
+ tasklets[6] = (task_ioreg_t) (block>>16);
+ tasklets[7] = (task_ioreg_t) (block>>24);
+ tasklets[8] = (task_ioreg_t) 0;
+ tasklets[9] = (task_ioreg_t) 0;
+// tasklets[8] = (task_ioreg_t) (block>>32);
+// tasklets[9] = (task_ioreg_t) (block>>40);
+#ifdef DEBUG
+ printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n",
+ drive->name,
+ (rq->cmd==READ)?"read":"writ",
+ block,
+ rq->nr_sectors,
+ (unsigned long) rq->buffer,
+ block);
+ printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n",
+ drive->name, tasklets[3], tasklets[2],
+ tasklets[9], tasklets[8], tasklets[7],
+ tasklets[6], tasklets[5], tasklets[4]);
+#endif
+ OUT_BYTE(tasklets[1], IDE_FEATURE_REG);
+ OUT_BYTE(tasklets[3], IDE_NSECTOR_REG);
+ OUT_BYTE(tasklets[7], IDE_SECTOR_REG);
+ OUT_BYTE(tasklets[8], IDE_LCYL_REG);
+ OUT_BYTE(tasklets[9], IDE_HCYL_REG);
+
+ OUT_BYTE(tasklets[0], IDE_FEATURE_REG);
+ OUT_BYTE(tasklets[2], IDE_NSECTOR_REG);
+ OUT_BYTE(tasklets[4], IDE_SECTOR_REG);
+ OUT_BYTE(tasklets[5], IDE_LCYL_REG);
+ OUT_BYTE(tasklets[6], IDE_HCYL_REG);
+ OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG);
+ } else {
+#ifdef DEBUG
+ printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
+ drive->name, (rq->cmd==READ)?"read":"writ",
+ block, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+ OUT_BYTE(0x00, IDE_FEATURE_REG);
+ OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+ OUT_BYTE(block,IDE_SECTOR_REG);
+ OUT_BYTE(block>>=8,IDE_LCYL_REG);
+ OUT_BYTE(block>>=8,IDE_HCYL_REG);
+ OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG);
+ }
+ } else {
+ unsigned int sect,head,cyl,track;
+ track = block / drive->sect;
+ sect = block % drive->sect + 1;
+ OUT_BYTE(sect,IDE_SECTOR_REG);
+ head = track % drive->head;
+ cyl = track / drive->head;
+
+ OUT_BYTE(0x00, IDE_FEATURE_REG);
+ OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+ OUT_BYTE(cyl,IDE_LCYL_REG);
+ OUT_BYTE(cyl>>8,IDE_HCYL_REG);
+ OUT_BYTE(head|drive->select.all,IDE_SELECT_REG);
+#ifdef DEBUG
+ printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
+ drive->name, (rq->cmd==READ)?"read":"writ", cyl,
+ head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+ }
+#ifdef CONFIG_BLK_DEV_PDC4030
+ if (IS_PDC4030_DRIVE) {
+ extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *);
+ return do_pdc4030_io (drive, rq);
+ }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+ if (rq->cmd == READ) {
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
+ return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+ OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG);
+ } else {
+ OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
+ }
+ return ide_started;
+ }
+ if (rq->cmd == WRITE) {
+ ide_startstop_t startstop;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
+ return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+ OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG);
+ } else {
+ OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
+ }
+ if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+ printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name,
+ drive->mult_count ? "MULTWRITE" : "WRITE");
+ return startstop;
+ }
+ if (!drive->unmask)
+ __cli(); /* local CPU only */
+ if (drive->mult_count) {
+ ide_hwgroup_t *hwgroup = HWGROUP(drive);
+ /*
+ * Ugh.. this part looks ugly because we MUST set up
+ * the interrupt handler before outputting the first block
+ * of data to be written. If we hit an error (corrupted buffer list)
+ * in ide_multwrite(), then we need to remove the handler/timer
+ * before returning. Fortunately, this NEVER happens (right?).
+ *
+ * Except when you get an error it seems...
+ */
+ hwgroup->wrq = *rq; /* scratchpad */
+ ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL);
+ if (ide_multwrite(drive, drive->mult_count)) {
+ unsigned long flags;
+ spin_lock_irqsave(&io_request_lock, flags);
+ hwgroup->handler = NULL;
+ del_timer(&hwgroup->timer);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return ide_stopped;
+ }
+ } else {
+ ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+ idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
+ }
+ return ide_started;
+ }
+ printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+ ide_end_request(0, HWGROUP(drive));
+ return ide_stopped;
+}
+
+#endif /* __TASKFILE__IO */
+
+static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+ MOD_INC_USE_COUNT;
+ if (drive->removable && drive->usage == 1) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.command = WIN_DOORLOCK;
+ check_disk_change(inode->i_rdev);
+ /*
+ * Ignore the return code from door_lock,
+ * since the open() has already succeeded,
+ * and the door_lock is irrelevant at this point.
+ */
+ if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+ drive->doorlocking = 0;
+ }
+ return 0;
+}
+
+static int do_idedisk_flushcache(ide_drive_t *drive);
+
+static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+ if (drive->removable && !drive->usage) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.command = WIN_DOORUNLOCK;
+ invalidate_bdev(inode->i_bdev, 0);
+ if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+ drive->doorlocking = 0;
+ }
+ if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+ if (do_idedisk_flushcache(drive))
+ printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+ drive->name);
+ MOD_DEC_USE_COUNT;
+}
+
+static int idedisk_media_change (ide_drive_t *drive)
+{
+ return drive->removable; /* if removable, always assume it was changed */
+}
+
+static void idedisk_revalidate (ide_drive_t *drive)
+{
+ grok_partitions(HWIF(drive)->gd, drive->select.b.unit,
+ 1<<PARTN_BITS,
+ current_capacity(drive));
+}
+
+/*
+ * Queries for true maximum capacity of the drive.
+ * Returns maximum LBA address (> 0) of the drive, 0 if failed.
+ */
+static unsigned long idedisk_read_native_max_address(ide_drive_t *drive)
+{
+ ide_task_t args;
+ unsigned long addr = 0;
+
+ if (!(drive->id->command_set_1 & 0x0400) &&
+ !(drive->id->cfs_enable_2 & 0x0100))
+ return addr;
+
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+ args.tfRegister[IDE_SELECT_OFFSET] = 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX;
+ args.handler = task_no_data_intr;
+
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+
+ /* if OK, compute maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+ | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16)
+ | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8)
+ | ((args.tfRegister[IDE_SECTOR_OFFSET] ));
+ }
+ addr++; /* since the return value is (maxlba - 1), we add 1 */
+ return addr;
+}
+
+static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive)
+{
+ ide_task_t args;
+ unsigned long long addr = 0;
+
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+
+ args.tfRegister[IDE_SELECT_OFFSET] = 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX_EXT;
+ args.handler = task_no_data_intr;
+
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+
+ /* if OK, compute maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+ ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+ (args.hobRegister[IDE_SECTOR_OFFSET_HOB]);
+ u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+ ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+ (args.tfRegister[IDE_SECTOR_OFFSET]);
+ addr = ((__u64)high << 24) | low;
+ }
+ addr++; /* since the return value is (maxlba - 1), we add 1 */
+ return addr;
+}
+
+#ifdef CONFIG_IDEDISK_STROKE
+/*
+ * Sets maximum virtual LBA address of the drive.
+ * Returns new maximum virtual LBA address (> 0) or 0 on failure.
+ */
+static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req)
+{
+ ide_task_t args;
+ unsigned long addr_set = 0;
+
+ addr_req--;
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+ args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff);
+ args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >> 8) & 0xff);
+ args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >> 16) & 0xff);
+ args.tfRegister[IDE_SELECT_OFFSET] = ((addr_req >> 24) & 0x0f) | 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX;
+ args.handler = task_no_data_intr;
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+ /* if OK, read new maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+ | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16)
+ | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8)
+ | ((args.tfRegister[IDE_SECTOR_OFFSET] ));
+ }
+ addr_set++;
+ return addr_set;
+}
+
+static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req)
+{
+ ide_task_t args;
+ unsigned long long addr_set = 0;
+
+ addr_req--;
+ /* Create IDE/ATA command request structure */
+ memset(&args, 0, sizeof(ide_task_t));
+ args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff);
+ args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >>= 8) & 0xff);
+ args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >>= 8) & 0xff);
+ args.tfRegister[IDE_SELECT_OFFSET] = 0x40;
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX_EXT;
+ args.hobRegister[IDE_SECTOR_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+ args.hobRegister[IDE_LCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+ args.hobRegister[IDE_HCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+ args.hobRegister[IDE_SELECT_OFFSET_HOB] = 0x40;
+ args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80);
+ args.handler = task_no_data_intr;
+ /* submit command request */
+ ide_raw_taskfile(drive, &args, NULL);
+ /* if OK, compute maximum address value */
+ if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+ u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+ ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+ (args.hobRegister[IDE_SECTOR_OFFSET_HOB]);
+ u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+ ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+ (args.tfRegister[IDE_SECTOR_OFFSET]);
+ addr_set = ((__u64)high << 24) | low;
+ }
+ return addr_set;
+}
+
+/*
+ * Tests if the drive supports Host Protected Area feature.
+ * Returns true if supported, false otherwise.
+ */
+static inline int idedisk_supports_host_protected_area(ide_drive_t *drive)
+{
+ int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0;
+ printk("%s: host protected area => %d\n", drive->name, flag);
+ return flag;
+}
+
+#endif /* CONFIG_IDEDISK_STROKE */
+
+/*
+ * Compute drive->capacity, the full capacity of the drive
+ * Called with drive->id != NULL.
+ *
+ * To compute capacity, this uses either of
+ *
+ * 1. CHS value set by user (whatever user sets will be trusted)
+ * 2. LBA value from target drive (require new ATA feature)
+ * 3. LBA value from system BIOS (new one is OK, old one may break)
+ * 4. CHS value from system BIOS (traditional style)
+ *
+ * in above order (i.e., if value of higher priority is available,
+ * reset will be ignored).
+ */
+static void init_idedisk_capacity (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+ unsigned long capacity = drive->cyl * drive->head * drive->sect;
+ unsigned long set_max = idedisk_read_native_max_address(drive);
+ unsigned long long capacity_2 = capacity;
+ unsigned long long set_max_ext;
+
+ drive->capacity48 = 0;
+ drive->select.b.lba = 0;
+
+ if (id->cfs_enable_2 & 0x0400) {
+ capacity_2 = id->lba_capacity_2;
+ drive->head = drive->bios_head = 255;
+ drive->sect = drive->bios_sect = 63;
+ drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ set_max_ext = idedisk_read_native_max_address_ext(drive);
+ if (set_max_ext > capacity_2) {
+#ifdef CONFIG_IDEDISK_STROKE
+ set_max_ext = idedisk_read_native_max_address_ext(drive);
+ set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext);
+ if (set_max_ext) {
+ drive->capacity48 = capacity_2 = set_max_ext;
+ drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ drive->id->lba_capacity_2 = capacity_2;
+ }
+#else /* !CONFIG_IDEDISK_STROKE */
+ printk("%s: setmax_ext LBA %llu, native %llu\n",
+ drive->name, set_max_ext, capacity_2);
+#endif /* CONFIG_IDEDISK_STROKE */
+ }
+ drive->bios_cyl = drive->cyl;
+ drive->capacity48 = capacity_2;
+ drive->capacity = (unsigned long) capacity_2;
+ return;
+ /* Determine capacity, and use LBA if the drive properly supports it */
+ } else if ((id->capability & 2) && lba_capacity_is_ok(id)) {
+ capacity = id->lba_capacity;
+ drive->cyl = capacity / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ }
+
+ if (set_max > capacity) {
+#ifdef CONFIG_IDEDISK_STROKE
+ set_max = idedisk_read_native_max_address(drive);
+ set_max = idedisk_set_max_address(drive, set_max);
+ if (set_max) {
+ drive->capacity = capacity = set_max;
+ drive->cyl = set_max / (drive->head * drive->sect);
+ drive->select.b.lba = 1;
+ drive->id->lba_capacity = capacity;
+ }
+#else /* !CONFIG_IDEDISK_STROKE */
+ printk("%s: setmax LBA %lu, native %lu\n",
+ drive->name, set_max, capacity);
+#endif /* CONFIG_IDEDISK_STROKE */
+ }
+
+ drive->capacity = capacity;
+
+ if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+ drive->capacity48 = id->lba_capacity_2;
+ drive->head = 255;
+ drive->sect = 63;
+ drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect);
+ }
+}
+
+static unsigned long idedisk_capacity (ide_drive_t *drive)
+{
+ if (drive->id->cfs_enable_2 & 0x0400)
+ return (drive->capacity48 - drive->sect0);
+ return (drive->capacity - drive->sect0);
+}
+
+static ide_startstop_t idedisk_special (ide_drive_t *drive)
+{
+ special_t *s = &drive->special;
+
+ if (s->b.set_geometry) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ ide_handler_t *handler = NULL;
+
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+ s->b.set_geometry = 0;
+ taskfile.sector_number = drive->sect;
+ taskfile.low_cylinder = drive->cyl;
+ taskfile.high_cylinder = drive->cyl>>8;
+ taskfile.device_head = ((drive->head-1)|drive->select.all)&0xBF;
+ if (!IS_PDC4030_DRIVE) {
+ taskfile.sector_count = drive->sect;
+ taskfile.command = WIN_SPECIFY;
+ handler = ide_handler_parser(&taskfile, &hobfile);
+ }
+ do_taskfile(drive, &taskfile, &hobfile, handler);
+ } else if (s->b.recalibrate) {
+ s->b.recalibrate = 0;
+ if (!IS_PDC4030_DRIVE) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.sector_count = drive->sect;
+ taskfile.command = WIN_RESTORE;
+ do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+ }
+ } else if (s->b.set_multmode) {
+ s->b.set_multmode = 0;
+ if (drive->id && drive->mult_req > drive->id->max_multsect)
+ drive->mult_req = drive->id->max_multsect;
+ if (!IS_PDC4030_DRIVE) {
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.sector_count = drive->mult_req;
+ taskfile.command = WIN_SETMULT;
+ do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+ }
+ } else if (s->all) {
+ int special = s->all;
+ s->all = 0;
+ printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
+ return ide_stopped;
+ }
+ return IS_PDC4030_DRIVE ? ide_stopped : ide_started;
+}
+
+static void idedisk_pre_reset (ide_drive_t *drive)
+{
+ int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1;
+
+ drive->special.all = 0;
+ drive->special.b.set_geometry = legacy;
+ drive->special.b.recalibrate = legacy;
+ if (OK_TO_RESET_CONTROLLER)
+ drive->mult_count = 0;
+ if (!drive->keep_settings && !drive->using_dma)
+ drive->mult_req = 0;
+ if (drive->mult_req != drive->mult_count)
+ drive->special.b.set_multmode = 1;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int smart_enable(ide_drive_t *drive)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = SMART_ENABLE;
+ taskfile.low_cylinder = SMART_LCYL_PASS;
+ taskfile.high_cylinder = SMART_HCYL_PASS;
+ taskfile.command = WIN_SMART;
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int get_smart_values(ide_drive_t *drive, byte *buf)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = SMART_READ_VALUES;
+ taskfile.sector_count = 0x01;
+ taskfile.low_cylinder = SMART_LCYL_PASS;
+ taskfile.high_cylinder = SMART_HCYL_PASS;
+ taskfile.command = WIN_SMART;
+ (void) smart_enable(drive);
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int get_smart_thresholds(ide_drive_t *drive, byte *buf)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = SMART_READ_THRESHOLDS;
+ taskfile.sector_count = 0x01;
+ taskfile.low_cylinder = SMART_LCYL_PASS;
+ taskfile.high_cylinder = SMART_HCYL_PASS;
+ taskfile.command = WIN_SMART;
+ (void) smart_enable(drive);
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int proc_idedisk_read_cache
+ (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ ide_drive_t *drive = (ide_drive_t *) data;
+ char *out = page;
+ int len;
+
+ if (drive->id)
+ len = sprintf(out,"%i\n", drive->id->buf_size / 2);
+ else
+ len = sprintf(out,"(none)\n");
+ PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_thresholds
+ (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ ide_drive_t *drive = (ide_drive_t *)data;
+ int len = 0, i = 0;
+
+ if (!get_smart_thresholds(drive, page)) {
+ unsigned short *val = (unsigned short *) page;
+ char *out = ((char *)val) + (SECTOR_WORDS * 4);
+ page = out;
+ do {
+ out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+ val += 1;
+ } while (i < (SECTOR_WORDS * 2));
+ len = out - page;
+ }
+ PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_values
+ (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ ide_drive_t *drive = (ide_drive_t *)data;
+ int len = 0, i = 0;
+
+ if (!get_smart_values(drive, page)) {
+ unsigned short *val = (unsigned short *) page;
+ char *out = ((char *)val) + (SECTOR_WORDS * 4);
+ page = out;
+ do {
+ out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+ val += 1;
+ } while (i < (SECTOR_WORDS * 2));
+ len = out - page;
+ }
+ PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static ide_proc_entry_t idedisk_proc[] = {
+ { "cache", S_IFREG|S_IRUGO, proc_idedisk_read_cache, NULL },
+ { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL },
+ { "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_smart_values, NULL },
+ { "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_smart_thresholds, NULL },
+ { NULL, 0, NULL, NULL }
+};
+
+#else
+
+#define idedisk_proc NULL
+
+#endif /* CONFIG_PROC_FS */
+
+static int set_multcount(ide_drive_t *drive, int arg)
+{
+#ifdef __TASKFILE__IO
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+
+ if (drive->special.b.set_multmode)
+ return -EBUSY;
+
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.sector_count = drive->mult_req;
+ taskfile.command = WIN_SETMULT;
+ drive->mult_req = arg;
+ drive->special.b.set_multmode = 1;
+ ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+#else /* !__TASKFILE__IO */
+ struct request rq;
+
+ if (drive->special.b.set_multmode)
+ return -EBUSY;
+ ide_init_drive_cmd (&rq);
+ rq.cmd = IDE_DRIVE_CMD;
+ drive->mult_req = arg;
+ drive->special.b.set_multmode = 1;
+ (void) ide_do_drive_cmd (drive, &rq, ide_wait);
+#endif /* __TASKFILE__IO */
+ return (drive->mult_count == arg) ? 0 : -EIO;
+}
+
+static int set_nowerr(ide_drive_t *drive, int arg)
+{
+ if (ide_spin_wait_hwgroup(drive))
+ return -EBUSY;
+ drive->nowerr = arg;
+ drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
+ spin_unlock_irq(&io_request_lock);
+ return 0;
+}
+
+static int write_cache (ide_drive_t *drive, int arg)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.feature = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE;
+ taskfile.command = WIN_SETFEATURES;
+
+ if (!(drive->id->cfs_enable_2 & 0x3000))
+ return 1;
+
+ (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+ drive->wcache = arg;
+ return 0;
+}
+
+static int do_idedisk_standby (ide_drive_t *drive)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ taskfile.command = WIN_STANDBYNOW1;
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int do_idedisk_flushcache (ide_drive_t *drive)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+ if (drive->id->cfs_enable_2 & 0x2400) {
+ taskfile.command = WIN_FLUSH_CACHE_EXT;
+ } else {
+ taskfile.command = WIN_FLUSH_CACHE;
+ }
+ return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int set_acoustic (ide_drive_t *drive, int arg)
+{
+ struct hd_drive_task_hdr taskfile;
+ struct hd_drive_hob_hdr hobfile;
+ memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+ memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+ taskfile.feature = (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM;
+ taskfile.sector_count = arg;
+
+ taskfile.command = WIN_SETFEATURES;
+ (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+ drive->acoustic = arg;
+ return 0;
+}
+
+static int probe_lba_addressing (ide_drive_t *drive, int arg)
+{
+ drive->addressing = 0;
+
+ if (!(drive->id->cfs_enable_2 & 0x0400))
+ return -EIO;
+
+ drive->addressing = arg;
+ return 0;
+}
+
+static int set_lba_addressing (ide_drive_t *drive, int arg)
+{
+ return (probe_lba_addressing(drive, arg));
+}
+
+static void idedisk_add_settings(ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+#if 0
+ int major = HWIF(drive)->major;
+ int minor = drive->select.b.unit << PARTN_BITS;
+#endif
+
+ ide_add_setting(drive, "bios_cyl", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->bios_cyl, NULL);
+ ide_add_setting(drive, "bios_head", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL);
+ ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL);
+ ide_add_setting(drive, "address", SETTING_RW, HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, TYPE_INTA, 0, 2, 1, 1, &drive->addressing, set_lba_addressing);
+ ide_add_setting(drive, "bswap", SETTING_READ, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->bswap, NULL);
+ ide_add_setting(drive, "multcount", id ? SETTING_RW : SETTING_READ, HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, TYPE_BYTE, 0, id ? id->max_multsect : 0, 1, 1, &drive->mult_count, set_multcount);
+ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr);
+#if 0
+ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 1, &read_ahead[major], NULL);
+ ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL);
+ ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 1, &max_sectors[major][minor], NULL);
+#endif
+ ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL);
+ ide_add_setting(drive, "wcache", SETTING_RW, HDIO_GET_WCACHE, HDIO_SET_WCACHE, TYPE_BYTE, 0, 1, 1, 1, &drive->wcache, write_cache);
+ ide_add_setting(drive, "acoustic", SETTING_RW, HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, TYPE_BYTE, 0, 254, 1, 1, &drive->acoustic, set_acoustic);
+ ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL);
+ ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL);
+}
+
+static void idedisk_setup (ide_drive_t *drive)
+{
+ int i;
+
+ struct hd_driveid *id = drive->id;
+ unsigned long capacity;
+
+ idedisk_add_settings(drive);
+
+ if (id == NULL)
+ return;
+
+ /*
+ * CompactFlash cards and their brethern look just like hard drives
+ * to us, but they are removable and don't have a doorlock mechanism.
+ */
+ if (drive->removable && !drive_is_flashcard(drive)) {
+ /*
+ * Removable disks (eg. SYQUEST); ignore 'WD' drives
+ */
+ if (id->model[0] != 'W' || id->model[1] != 'D') {
+ drive->doorlocking = 1;
+ }
+ }
+ for (i = 0; i < MAX_DRIVES; ++i) {
+ ide_hwif_t *hwif = HWIF(drive);
+
+ if (drive != &hwif->drives[i]) continue;
+#ifdef DEVFS_MUST_DIE
+ hwif->gd->de_arr[i] = drive->de;
+#endif
+ if (drive->removable)
+ hwif->gd->flags[i] |= GENHD_FL_REMOVABLE;
+ break;
+ }
+
+ /* Extract geometry if we did not already have one for the drive */
+ if (!drive->cyl || !drive->head || !drive->sect) {
+ drive->cyl = drive->bios_cyl = id->cyls;
+ drive->head = drive->bios_head = id->heads;
+ drive->sect = drive->bios_sect = id->sectors;
+ }
+
+ /* Handle logical geometry translation by the drive */
+ if ((id->field_valid & 1) && id->cur_cyls &&
+ id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) {
+ drive->cyl = id->cur_cyls;
+ drive->head = id->cur_heads;
+ drive->sect = id->cur_sectors;
+ }
+
+ /* Use physical geometry if what we have still makes no sense */
+ if (drive->head > 16 && id->heads && id->heads <= 16) {
+ drive->cyl = id->cyls;
+ drive->head = id->heads;
+ drive->sect = id->sectors;
+ }
+
+ /* calculate drive capacity, and select LBA if possible */
+ init_idedisk_capacity (drive);
+
+ /*
+ * if possible, give fdisk access to more of the drive,
+ * by correcting bios_cyls:
+ */
+ capacity = idedisk_capacity (drive);
+ if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) &&
+ (!drive->forced_geom) && drive->bios_sect && drive->bios_head)
+ drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head;
+ printk (KERN_INFO "XEN %s: %ld sectors", drive->name, capacity);
+
+ /* Give size in megabytes (MB), not mebibytes (MiB). */
+ /* We compute the exact rounded value, avoiding overflow. */
+ printk (" (%ld MB)", (capacity - capacity/625 + 974)/1950);
+
+ /* Only print cache size when it was specified */
+ if (id->buf_size)
+ printk (" w/%dKiB Cache", id->buf_size/2);
+
+ printk(", CHS=%d/%d/%d",
+ drive->bios_cyl, drive->bios_head, drive->bios_sect);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ if (drive->using_dma)
+ (void) HWIF(drive)->dmaproc(ide_dma_verbose, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+ printk("\n");
+
+ drive->mult_count = 0;
+ if (id->max_multsect) {
+#ifdef CONFIG_IDEDISK_MULTI_MODE
+ id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0;
+ id->multsect_valid = id->multsect ? 1 : 0;
+ drive->mult_req = id->multsect_valid ? id->max_multsect : INITIAL_MULT_COUNT;
+ drive->special.b.set_multmode = drive->mult_req ? 1 : 0;
+#else /* original, pre IDE-NFG, per request of AC */
+ drive->mult_req = INITIAL_MULT_COUNT;
+ if (drive->mult_req > id->max_multsect)
+ drive->mult_req = id->max_multsect;
+ if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
+ drive->special.b.set_multmode = 1;
+#endif /* CONFIG_IDEDISK_MULTI_MODE */
+ }
+ drive->no_io_32bit = id->dword_io ? 1 : 0;
+ if (drive->id->cfs_enable_2 & 0x3000)
+ write_cache(drive, (id->cfs_enable_2 & 0x3000));
+ (void) probe_lba_addressing(drive, 1);
+}
+
+static int idedisk_cleanup (ide_drive_t *drive)
+{
+ if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+ if (do_idedisk_flushcache(drive))
+ printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+ drive->name);
+ return ide_unregister_subdriver(drive);
+}
+
+int idedisk_reinit(ide_drive_t *drive);
+
+/*
+ * IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idedisk_driver = {
+ name: "ide-disk",
+ version: IDEDISK_VERSION,
+ media: ide_disk,
+ busy: 0,
+ supports_dma: 1,
+ supports_dsc_overlap: 0,
+ cleanup: idedisk_cleanup,
+ standby: do_idedisk_standby,
+ flushcache: do_idedisk_flushcache,
+ do_request: do_rw_disk,
+ end_request: NULL,
+ ioctl: NULL,
+ open: idedisk_open,
+ release: idedisk_release,
+ media_change: idedisk_media_change,
+ revalidate: idedisk_revalidate,
+ pre_reset: idedisk_pre_reset,
+ capacity: idedisk_capacity,
+ special: idedisk_special,
+ /*proc: idedisk_proc,*/
+ reinit: idedisk_reinit,
+ ata_prebuilder: NULL,
+ atapi_prebuilder: NULL,
+};
+
+int idedisk_init (void);
+static ide_module_t idedisk_module = {
+ IDE_DRIVER_MODULE,
+ idedisk_init,
+ &idedisk_driver,
+ NULL
+};
+
+MODULE_DESCRIPTION("ATA DISK Driver");
+
+int idedisk_reinit (ide_drive_t *drive)
+{
+ int failed = 0;
+
+ MOD_INC_USE_COUNT;
+
+ if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+ printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+ return 1;
+ }
+ DRIVER(drive)->busy++;
+ idedisk_setup(drive);
+ if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+ printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+ (void) idedisk_cleanup(drive);
+ DRIVER(drive)->busy--;
+ return 1;
+ }
+ DRIVER(drive)->busy--;
+ failed--;
+
+ ide_register_module(&idedisk_module);
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+static void __exit idedisk_exit (void)
+{
+ ide_drive_t *drive;
+ int failed = 0;
+
+ while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, &idedisk_driver, failed)) != NULL) {
+ if (idedisk_cleanup (drive)) {
+ printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name);
+ failed++;
+ }
+ /* We must remove proc entries defined in this module.
+ Otherwise we oops while accessing these entries */
+#ifdef CONFIG_PROC_FS
+ if (drive->proc)
+ ide_remove_proc_entries(drive->proc, idedisk_proc);
+#endif
+ }
+ ide_unregister_module(&idedisk_module);
+}
+
+int idedisk_init (void)
+{
+ ide_drive_t *drive;
+ int failed = 0;
+
+ MOD_INC_USE_COUNT;
+ while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, NULL, failed++)) != NULL) {
+ if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+ printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+ continue;
+ }
+ DRIVER(drive)->busy++;
+ idedisk_setup(drive);
+ if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+ printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+ (void) idedisk_cleanup(drive);
+ DRIVER(drive)->busy--;
+ continue;
+ }
+ DRIVER(drive)->busy--;
+ failed--;
+ }
+ ide_register_module(&idedisk_module);
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+module_init(idedisk_init);
+module_exit(idedisk_exit);
+MODULE_LICENSE("GPL");
diff --git a/xen-2.4.16/drivers/ide/ide-probe.c b/xen-2.4.16/drivers/ide/ide-probe.c
index e83157ec01..99f38dfcb8 100644
--- a/xen-2.4.16/drivers/ide/ide-probe.c
+++ b/xen-2.4.16/drivers/ide/ide-probe.c
@@ -51,11 +51,18 @@
#include <asm/uaccess.h>
#include <asm/io.h>
+#define IDE_PROBE_TRACE 0
+
static inline void do_identify (ide_drive_t *drive, byte cmd)
{
int bswap = 1;
struct hd_driveid *id;
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::do_identify\n");
+ }
+
id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC); /* called with interrupts disabled! */
if (!id) {
printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n");
@@ -201,6 +208,11 @@ static int actual_try_to_identify (ide_drive_t *drive, byte cmd)
unsigned long timeout;
byte s, a;
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::actual_try_to_identify\n");
+ }
+
if (IDE_CONTROL_REG) {
/* take a deep breath */
ide_delay_50ms();
@@ -260,6 +272,11 @@ static int try_to_identify (ide_drive_t *drive, byte cmd)
int autoprobe = 0;
unsigned long cookie = 0;
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::try_to_identify\n");
+ }
+
if (IDE_CONTROL_REG && !HWIF(drive)->irq) {
autoprobe = 1;
cookie = probe_irq_on();
@@ -314,6 +331,12 @@ static int do_probe (ide_drive_t *drive, byte cmd)
{
int rc;
ide_hwif_t *hwif = HWIF(drive);
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::do_probe\n");
+ }
+
if (drive->present) { /* avoid waiting for inappropriate probes */
if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY))
return 4;
@@ -372,6 +395,11 @@ static void enable_nest (ide_drive_t *drive)
{
unsigned long timeout;
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::enable_nest\n");
+ }
+
printk("%s: enabling %s -- ", HWIF(drive)->name, drive->id->model);
SELECT_DRIVE(HWIF(drive), drive);
ide_delay_50ms();
@@ -402,6 +430,11 @@ static void enable_nest (ide_drive_t *drive)
*/
static inline byte probe_for_drive (ide_drive_t *drive)
{
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::probe_for_drive\n");
+ }
+
if (drive->noprobe) /* skip probing? */
return drive->present;
if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
@@ -500,6 +533,11 @@ static void probe_hwif (ide_hwif_t *hwif)
unsigned int unit;
unsigned long flags;
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::probe_hwif\n");
+ }
+
if (hwif->noprobe)
return;
#ifdef CONFIG_BLK_DEV_IDE
@@ -978,6 +1016,11 @@ int ideprobe_init (void)
{
unsigned int index;
int probe[MAX_HWIFS];
+
+ if (IDE_PROBE_TRACE)
+ {
+ printk (KERN_ALERT "ide-probe::ideprobe_init\n");
+ }
MOD_INC_USE_COUNT;
memset(probe, 0, MAX_HWIFS * sizeof(int));
diff --git a/xen-2.4.16/drivers/ide/ide-taskfile.c b/xen-2.4.16/drivers/ide/ide-taskfile.c
index 34bfacebfe..578af55156 100644
--- a/xen-2.4.16/drivers/ide/ide-taskfile.c
+++ b/xen-2.4.16/drivers/ide/ide-taskfile.c
@@ -171,6 +171,8 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
struct hd_driveid *id = drive->id;
byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+ printk(KERN_ALERT "do_rw_taskfile\n");
+
/* (ks/hs): Moved to start, do not use for multiple out commands */
if (task->handler != task_mulout_intr) {
if (IDE_CONTROL_REG)
diff --git a/xen-2.4.16/drivers/ide/ide-xeno.c b/xen-2.4.16/drivers/ide/ide-xeno.c
new file mode 100644
index 0000000000..eb7e6cab58
--- /dev/null
+++ b/xen-2.4.16/drivers/ide/ide-xeno.c
@@ -0,0 +1,46 @@
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/ide.h>
+#include <hypervisor-ifs/block.h>
+
+
+void
+ide_probe_devices (xen_disk_info_t* xdi)
+{
+ int loop;
+
+ for (loop = 0; loop < MAX_HWIFS; ++loop)
+ {
+ ide_hwif_t *hwif = &ide_hwifs[loop];
+ if (hwif->present)
+ {
+ struct gendisk *gd = hwif->gd;
+ unsigned int unit;
+
+ for (unit = 0; unit < MAX_DRIVES; ++unit)
+ {
+ unsigned long capacity;
+
+ ide_drive_t *drive = &hwif->drives[unit];
+
+ if (drive->present)
+ {
+ capacity = current_capacity (drive);
+
+ xdi->disks[xdi->count].type = XEN_DISK_IDE;
+ xdi->disks[xdi->count].capacity = capacity;
+ xdi->count++;
+
+ printk (KERN_ALERT "IDE-XENO %d\n", xdi->count);
+ printk (KERN_ALERT " capacity 0x%x\n", capacity);
+ printk (KERN_ALERT " head 0x%x\n", drive->bios_head);
+ printk (KERN_ALERT " sector 0x%x\n", drive->bios_sect);
+ printk (KERN_ALERT " cylinder 0x%x\n", drive->bios_cyl);
+ }
+ }
+ }
+ }
+
+ return;
+}
diff --git a/xen-2.4.16/drivers/ide/ide.c b/xen-2.4.16/drivers/ide/ide.c
index af3694bf85..51cee21f77 100644
--- a/xen-2.4.16/drivers/ide/ide.c
+++ b/xen-2.4.16/drivers/ide/ide.c
@@ -1391,6 +1391,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
block = rq->sector;
blockend = block + rq->nr_sectors;
+
+#ifdef NEVER
if ((rq->cmd == READ || rq->cmd == WRITE) &&
(drive->media == ide_disk || drive->media == ide_floppy)) {
if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) {
@@ -1404,6 +1406,15 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
possibly killing some innocent following sector */
if (block == 0 && drive->remap_0_to_1 == 1)
block = 1; /* redirect MBR access to EZ-Drive partn table */
+#endif
+
+#ifdef NEVER_DEBUG
+ {
+ printk(" ide::start_request %lx %lx %lx %lx %lx\n",
+ rq->sector, rq->nr_sectors, block,
+ drive->part[minor&PARTN_MASK].start_sect, drive->sect0);
+ }
+#endif
#if (DISK_RECOVERY_TIME > 0)
while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME);
@@ -1414,6 +1425,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
printk("%s: drive not ready for command\n", drive->name);
return startstop;
}
+ drive->special.all = 0;
if (!drive->special.all) {
switch(rq->cmd) {
case IDE_DRIVE_CMD:
diff --git a/xen-2.4.16/drivers/ide/piix.c b/xen-2.4.16/drivers/ide/piix.c
new file mode 100644
index 0000000000..ca6629e9ef
--- /dev/null
+++ b/xen-2.4.16/drivers/ide/piix.c
@@ -0,0 +1,536 @@
+/*
+ * linux/drivers/ide/piix.c Version 0.32 June 9, 2000
+ *
+ * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
+ * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
+ * May be copied or modified under the terms of the GNU General Public License
+ *
+ * PIO mode setting function for Intel chipsets.
+ * For use instead of BIOS settings.
+ *
+ * 40-41
+ * 42-43
+ *
+ * 41
+ * 43
+ *
+ * | PIO 0 | c0 | 80 | 0 | piix_tune_drive(drive, 0);
+ * | PIO 2 | SW2 | d0 | 90 | 4 | piix_tune_drive(drive, 2);
+ * | PIO 3 | MW1 | e1 | a1 | 9 | piix_tune_drive(drive, 3);
+ * | PIO 4 | MW2 | e3 | a3 | b | piix_tune_drive(drive, 4);
+ *
+ * sitre = word40 & 0x4000; primary
+ * sitre = word42 & 0x4000; secondary
+ *
+ * 44 8421|8421 hdd|hdb
+ *
+ * 48 8421 hdd|hdc|hdb|hda udma enabled
+ *
+ * 0001 hda
+ * 0010 hdb
+ * 0100 hdc
+ * 1000 hdd
+ *
+ * 4a 84|21 hdb|hda
+ * 4b 84|21 hdd|hdc
+ *
+ * ata-33/82371AB
+ * ata-33/82371EB
+ * ata-33/82801AB ata-66/82801AA
+ * 00|00 udma 0 00|00 reserved
+ * 01|01 udma 1 01|01 udma 3
+ * 10|10 udma 2 10|10 udma 4
+ * 11|11 reserved 11|11 reserved
+ *
+ * 54 8421|8421 ata66 drive|ata66 enable
+ *
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x40, &reg40);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x42, &reg42);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x44, &reg44);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x48, &reg48);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x4a, &reg4a);
+ * pci_read_config_word(HWIF(drive)->pci_dev, 0x54, &reg54);
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+
+#include "ide_modes.h"
+
+#define PIIX_DEBUG_DRIVE_INFO 0
+
+#define DISPLAY_PIIX_TIMINGS
+
+#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS)
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+static int piix_get_info(char *, char **, off_t, int);
+extern int (*piix_display_info)(char *, char **, off_t, int); /* ide-proc.c */
+extern char *ide_media_verbose(ide_drive_t *);
+static struct pci_dev *bmide_dev;
+
+static int piix_get_info (char *buffer, char **addr, off_t offset, int count)
+{
+ char *p = buffer;
+ u32 bibma = pci_resource_start(bmide_dev, 4);
+ u16 reg40 = 0, psitre = 0, reg42 = 0, ssitre = 0;
+ u8 c0 = 0, c1 = 0;
+ u8 reg44 = 0, reg48 = 0, reg4a = 0, reg4b = 0, reg54 = 0, reg55 = 0;
+
+ switch(bmide_dev->device) {
+ case PCI_DEVICE_ID_INTEL_82801BA_8:
+ case PCI_DEVICE_ID_INTEL_82801BA_9:
+ case PCI_DEVICE_ID_INTEL_82801CA_10:
+ p += sprintf(p, "\n Intel PIIX4 Ultra 100 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82372FB_1:
+ case PCI_DEVICE_ID_INTEL_82801AA_1:
+ p += sprintf(p, "\n Intel PIIX4 Ultra 66 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82451NX:
+ case PCI_DEVICE_ID_INTEL_82801AB_1:
+ case PCI_DEVICE_ID_INTEL_82443MX_1:
+ case PCI_DEVICE_ID_INTEL_82371AB:
+ p += sprintf(p, "\n Intel PIIX4 Ultra 33 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82371SB_1:
+ p += sprintf(p, "\n Intel PIIX3 Chipset.\n");
+ break;
+ case PCI_DEVICE_ID_INTEL_82371MX:
+ p += sprintf(p, "\n Intel MPIIX Chipset.\n");
+ return p-buffer; /* => must be less than 4k! */
+ case PCI_DEVICE_ID_INTEL_82371FB_1:
+ case PCI_DEVICE_ID_INTEL_82371FB_0:
+ default:
+ p += sprintf(p, "\n Intel PIIX Chipset.\n");
+ break;
+ }
+
+ pci_read_config_word(bmide_dev, 0x40, &reg40);
+ pci_read_config_word(bmide_dev, 0x42, &reg42);
+ pci_read_config_byte(bmide_dev, 0x44, &reg44);
+ pci_read_config_byte(bmide_dev, 0x48, &reg48);
+ pci_read_config_byte(bmide_dev, 0x4a, &reg4a);
+ pci_read_config_byte(bmide_dev, 0x4b, &reg4b);
+ pci_read_config_byte(bmide_dev, 0x54, &reg54);
+ pci_read_config_byte(bmide_dev, 0x55, &reg55);
+
+ psitre = (reg40 & 0x4000) ? 1 : 0;
+ ssitre = (reg42 & 0x4000) ? 1 : 0;
+
+ /*
+ * at that point bibma+0x2 et bibma+0xa are byte registers
+ * to investigate:
+ */
+ c0 = inb_p((unsigned short)bibma + 0x02);
+ c1 = inb_p((unsigned short)bibma + 0x0a);
+
+ p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n");
+ p += sprintf(p, " %sabled %sabled\n",
+ (c0&0x80) ? "dis" : " en",
+ (c1&0x80) ? "dis" : " en");
+ p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n");
+ p += sprintf(p, "DMA enabled: %s %s %s %s\n",
+ (c0&0x20) ? "yes" : "no ",
+ (c0&0x40) ? "yes" : "no ",
+ (c1&0x20) ? "yes" : "no ",
+ (c1&0x40) ? "yes" : "no " );
+ p += sprintf(p, "UDMA enabled: %s %s %s %s\n",
+ (reg48&0x01) ? "yes" : "no ",
+ (reg48&0x02) ? "yes" : "no ",
+ (reg48&0x04) ? "yes" : "no ",
+ (reg48&0x08) ? "yes" : "no " );
+ p += sprintf(p, "UDMA enabled: %s %s %s %s\n",
+ ((reg54&0x11) && (reg55&0x10) && (reg4a&0x01)) ? "5" :
+ ((reg54&0x11) && (reg4a&0x02)) ? "4" :
+ ((reg54&0x11) && (reg4a&0x01)) ? "3" :
+ (reg4a&0x02) ? "2" :
+ (reg4a&0x01) ? "1" :
+ (reg4a&0x00) ? "0" : "X",
+ ((reg54&0x22) && (reg55&0x20) && (reg4a&0x10)) ? "5" :
+ ((reg54&0x22) && (reg4a&0x20)) ? "4" :
+ ((reg54&0x22) && (reg4a&0x10)) ? "3" :
+ (reg4a&0x20) ? "2" :
+ (reg4a&0x10) ? "1" :
+ (reg4a&0x00) ? "0" : "X",
+ ((reg54&0x44) && (reg55&0x40) && (reg4b&0x03)) ? "5" :
+ ((reg54&0x44) && (reg4b&0x02)) ? "4" :
+ ((reg54&0x44) && (reg4b&0x01)) ? "3" :
+ (reg4b&0x02) ? "2" :
+ (reg4b&0x01) ? "1" :
+ (reg4b&0x00) ? "0" : "X",
+ ((reg54&0x88) && (reg55&0x80) && (reg4b&0x30)) ? "5" :
+ ((reg54&0x88) && (reg4b&0x20)) ? "4" :
+ ((reg54&0x88) && (reg4b&0x10)) ? "3" :
+ (reg4b&0x20) ? "2" :
+ (reg4b&0x10) ? "1" :
+ (reg4b&0x00) ? "0" : "X");
+
+ p += sprintf(p, "UDMA\n");
+ p += sprintf(p, "DMA\n");
+ p += sprintf(p, "PIO\n");
+
+/*
+ * FIXME.... Add configuration junk data....blah blah......
+ */
+
+ return p-buffer; /* => must be less than 4k! */
+}
+#endif /* defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS) */
+
+/*
+ * Used to set Fifo configuration via kernel command line:
+ */
+
+byte piix_proc = 0;
+
+extern char *ide_xfer_verbose (byte xfer_rate);
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING)
+/*
+ *
+ */
+static byte piix_dma_2_pio (byte xfer_rate) {
+ switch(xfer_rate) {
+ case XFER_UDMA_5:
+ case XFER_UDMA_4:
+ case XFER_UDMA_3:
+ case XFER_UDMA_2:
+ case XFER_UDMA_1:
+ case XFER_UDMA_0:
+ case XFER_MW_DMA_2:
+ case XFER_PIO_4:
+ return 4;
+ case XFER_MW_DMA_1:
+ case XFER_PIO_3:
+ return 3;
+ case XFER_SW_DMA_2:
+ case XFER_PIO_2:
+ return 2;
+ case XFER_MW_DMA_0:
+ case XFER_SW_DMA_1:
+ case XFER_SW_DMA_0:
+ case XFER_PIO_1:
+ case XFER_PIO_0:
+ case XFER_PIO_SLOW:
+ default:
+ return 0;
+ }
+}
+#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */
+
+/*
+ * Based on settings done by AMI BIOS
+ * (might be useful if drive is not registered in CMOS for any reason).
+ */
+static void piix_tune_drive (ide_drive_t *drive, byte pio)
+{
+ unsigned long flags;
+ u16 master_data;
+ byte slave_data;
+ int is_slave = (&HWIF(drive)->drives[1] == drive);
+ int master_port = HWIF(drive)->index ? 0x42 : 0x40;
+ int slave_port = 0x44;
+ /* ISP RTC */
+ byte timings[][2] = { { 0, 0 },
+ { 0, 0 },
+ { 1, 0 },
+ { 2, 1 },
+ { 2, 3 }, };
+
+ pio = ide_get_best_pio_mode(drive, pio, 5, NULL);
+ pci_read_config_word(HWIF(drive)->pci_dev, master_port, &master_data);
+ if (is_slave) {
+ master_data = master_data | 0x4000;
+ if (pio > 1)
+ /* enable PPE, IE and TIME */
+ master_data = master_data | 0x0070;
+ pci_read_config_byte(HWIF(drive)->pci_dev, slave_port, &slave_data);
+ slave_data = slave_data & (HWIF(drive)->index ? 0x0f : 0xf0);
+ slave_data = slave_data | ((timings[pio][0] << 2) | (timings[pio][1]
+ << (HWIF(drive)->index ? 4 : 0)));
+ } else {
+ master_data = master_data & 0xccf8;
+ if (pio > 1)
+ /* enable PPE, IE and TIME */
+ master_data = master_data | 0x0007;
+ master_data = master_data | (timings[pio][0] << 12) |
+ (timings[pio][1] << 8);
+ }
+ save_flags(flags);
+ cli();
+ pci_write_config_word(HWIF(drive)->pci_dev, master_port, master_data);
+ if (is_slave)
+ pci_write_config_byte(HWIF(drive)->pci_dev, slave_port, slave_data);
+ restore_flags(flags);
+}
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING)
+static int piix_tune_chipset (ide_drive_t *drive, byte speed)
+{
+ ide_hwif_t *hwif = HWIF(drive);
+ struct pci_dev *dev = hwif->pci_dev;
+ byte maslave = hwif->channel ? 0x42 : 0x40;
+ int a_speed = 3 << (drive->dn * 4);
+ int u_flag = 1 << drive->dn;
+ int v_flag = 0x01 << drive->dn;
+ int w_flag = 0x10 << drive->dn;
+ int u_speed = 0;
+ int err = 0;
+ int sitre;
+ short reg4042, reg44, reg48, reg4a, reg54;
+ byte reg55;
+
+ pci_read_config_word(dev, maslave, &reg4042);
+ sitre = (reg4042 & 0x4000) ? 1 : 0;
+ pci_read_config_word(dev, 0x44, &reg44);
+ pci_read_config_word(dev, 0x48, &reg48);
+ pci_read_config_word(dev, 0x4a, &reg4a);
+ pci_read_config_word(dev, 0x54, &reg54);
+ pci_read_config_byte(dev, 0x55, &reg55);
+
+ switch(speed) {
+ case XFER_UDMA_4:
+ case XFER_UDMA_2: u_speed = 2 << (drive->dn * 4); break;
+ case XFER_UDMA_5:
+ case XFER_UDMA_3:
+ case XFER_UDMA_1: u_speed = 1 << (drive->dn * 4); break;
+ case XFER_UDMA_0: u_speed = 0 << (drive->dn * 4); break;
+ case XFER_MW_DMA_2:
+ case XFER_MW_DMA_1:
+ case XFER_SW_DMA_2: break;
+ default: return -1;
+ }
+
+ if (speed >= XFER_UDMA_0) {
+ if (!(reg48 & u_flag))
+ pci_write_config_word(dev, 0x48, reg48|u_flag);
+ if (speed == XFER_UDMA_5) {
+ pci_write_config_byte(dev, 0x55, (byte) reg55|w_flag);
+ } else {
+ pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag);
+ }
+ if (!(reg4a & u_speed)) {
+ pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
+ pci_write_config_word(dev, 0x4a, reg4a|u_speed);
+ }
+ if (speed > XFER_UDMA_2) {
+ if (!(reg54 & v_flag)) {
+ pci_write_config_word(dev, 0x54, reg54|v_flag);
+ }
+ } else {
+ pci_write_config_word(dev, 0x54, reg54 & ~v_flag);
+ }
+ }
+ if (speed < XFER_UDMA_0) {
+ if (reg48 & u_flag)
+ pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
+ if (reg4a & a_speed)
+ pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
+ if (reg54 & v_flag)
+ pci_write_config_word(dev, 0x54, reg54 & ~v_flag);
+ if (reg55 & w_flag)
+ pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag);
+ }
+
+ piix_tune_drive(drive, piix_dma_2_pio(speed));
+
+#if PIIX_DEBUG_DRIVE_INFO
+ printk("%s: %s drive%d\n", drive->name, ide_xfer_verbose(speed), drive->dn);
+#endif /* PIIX_DEBUG_DRIVE_INFO */
+ if (!drive->init_speed)
+ drive->init_speed = speed;
+ err = ide_config_drive_speed(drive, speed);
+ drive->current_speed = speed;
+ return err;
+}
+
+static int piix_config_drive_for_dma (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+ ide_hwif_t *hwif = HWIF(drive);
+ struct pci_dev *dev = hwif->pci_dev;
+ byte speed;
+
+ byte udma_66 = eighty_ninty_three(drive);
+ int ultra100 = ((dev->device == PCI_DEVICE_ID_INTEL_82801BA_8) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801BA_9) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801CA_10)) ? 1 : 0;
+ int ultra66 = ((ultra100) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801AA_1) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82372FB_1)) ? 1 : 0;
+ int ultra = ((ultra66) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82371AB) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82443MX_1) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82451NX) ||
+ (dev->device == PCI_DEVICE_ID_INTEL_82801AB_1)) ? 1 : 0;
+
+ if ((id->dma_ultra & 0x0020) && (udma_66) && (ultra100)) {
+ speed = XFER_UDMA_5;
+ } else if ((id->dma_ultra & 0x0010) && (ultra)) {
+ speed = ((udma_66) && (ultra66)) ? XFER_UDMA_4 : XFER_UDMA_2;
+ } else if ((id->dma_ultra & 0x0008) && (ultra)) {
+ speed = ((udma_66) && (ultra66)) ? XFER_UDMA_3 : XFER_UDMA_1;
+ } else if ((id->dma_ultra & 0x0004) && (ultra)) {
+ speed = XFER_UDMA_2;
+ } else if ((id->dma_ultra & 0x0002) && (ultra)) {
+ speed = XFER_UDMA_1;
+ } else if ((id->dma_ultra & 0x0001) && (ultra)) {
+ speed = XFER_UDMA_0;
+ } else if (id->dma_mword & 0x0004) {
+ speed = XFER_MW_DMA_2;
+ } else if (id->dma_mword & 0x0002) {
+ speed = XFER_MW_DMA_1;
+ } else if (id->dma_1word & 0x0004) {
+ speed = XFER_SW_DMA_2;
+ } else {
+ speed = XFER_PIO_0 + ide_get_best_pio_mode(drive, 255, 5, NULL);
+ }
+
+ (void) piix_tune_chipset(drive, speed);
+
+ return ((int) ((id->dma_ultra >> 11) & 7) ? ide_dma_on :
+ ((id->dma_ultra >> 8) & 7) ? ide_dma_on :
+ ((id->dma_mword >> 8) & 7) ? ide_dma_on :
+ ((id->dma_1word >> 8) & 7) ? ide_dma_on :
+ ide_dma_off_quietly);
+}
+
+static void config_chipset_for_pio (ide_drive_t *drive)
+{
+ piix_tune_drive(drive, ide_get_best_pio_mode(drive, 255, 5, NULL));
+}
+
+static int config_drive_xfer_rate (ide_drive_t *drive)
+{
+ struct hd_driveid *id = drive->id;
+ ide_dma_action_t dma_func = ide_dma_on;
+
+ if (id && (id->capability & 1) && HWIF(drive)->autodma) {
+ /* Consult the list of known "bad" drives */
+ if (ide_dmaproc(ide_dma_bad_drive, drive)) {
+ dma_func = ide_dma_off;
+ goto fast_ata_pio;
+ }
+ dma_func = ide_dma_off_quietly;
+ if (id->field_valid & 4) {
+ if (id->dma_ultra & 0x002F) {
+ /* Force if Capable UltraDMA */
+ dma_func = piix_config_drive_for_dma(drive);
+ if ((id->field_valid & 2) &&
+ (dma_func != ide_dma_on))
+ goto try_dma_modes;
+ }
+ } else if (id->field_valid & 2) {
+try_dma_modes:
+ if ((id->dma_mword & 0x0007) ||
+ (id->dma_1word & 0x007)) {
+ /* Force if Capable regular DMA modes */
+ dma_func = piix_config_drive_for_dma(drive);
+ if (dma_func != ide_dma_on)
+ goto no_dma_set;
+ }
+ } else if (ide_dmaproc(ide_dma_good_drive, drive)) {
+ if (id->eide_dma_time > 150) {
+ goto no_dma_set;
+ }
+ /* Consult the list of known "good" drives */
+ dma_func = piix_config_drive_for_dma(drive);
+ if (dma_func != ide_dma_on)
+ goto no_dma_set;
+ } else {
+ goto fast_ata_pio;
+ }
+ } else if ((id->capability & 8) || (id->field_valid & 2)) {
+fast_ata_pio:
+ dma_func = ide_dma_off_quietly;
+no_dma_set:
+ config_chipset_for_pio(drive);
+ }
+ return HWIF(drive)->dmaproc(dma_func, drive);
+}
+
+static int piix_dmaproc(ide_dma_action_t func, ide_drive_t *drive)
+{
+ switch (func) {
+ case ide_dma_check:
+ return config_drive_xfer_rate(drive);
+ default :
+ break;
+ }
+ /* Other cases are done by generic IDE-DMA code. */
+ return ide_dmaproc(func, drive);
+}
+#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */
+
+unsigned int __init pci_init_piix (struct pci_dev *dev, const char *name)
+{
+#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS)
+ if (!piix_proc) {
+ piix_proc = 1;
+ bmide_dev = dev;
+ piix_display_info = &piix_get_info;
+ }
+#endif /* DISPLAY_PIIX_TIMINGS && CONFIG_PROC_FS */
+ return 0;
+}
+
+/*
+ * Sheesh, someone at Intel needs to go read the ATA-4/5 T13 standards.
+ * It does not specify device detection, but channel!!!
+ * You determine later if bit 13 of word93 is set...
+ */
+unsigned int __init ata66_piix (ide_hwif_t *hwif)
+{
+ byte reg54h = 0, reg55h = 0, ata66 = 0;
+ byte mask = hwif->channel ? 0xc0 : 0x30;
+
+ pci_read_config_byte(hwif->pci_dev, 0x54, &reg54h);
+ pci_read_config_byte(hwif->pci_dev, 0x55, &reg55h);
+
+ ata66 = (reg54h & mask) ? 1 : 0;
+
+ return ata66;
+}
+
+void __init ide_init_piix (ide_hwif_t *hwif)
+{
+#ifndef CONFIG_IA64
+ if (!hwif->irq)
+ hwif->irq = hwif->channel ? 15 : 14;
+#endif /* CONFIG_IA64 */
+
+ if (hwif->pci_dev->device == PCI_DEVICE_ID_INTEL_82371MX) {
+ /* This is a painful system best to let it self tune for now */
+ return;
+ }
+
+ hwif->tuneproc = &piix_tune_drive;
+ hwif->drives[0].autotune = 1;
+ hwif->drives[1].autotune = 1;
+
+ if (!hwif->dma_base)
+ return;
+
+#ifndef CONFIG_BLK_DEV_IDEDMA
+ hwif->autodma = 0;
+#else /* CONFIG_BLK_DEV_IDEDMA */
+#ifdef CONFIG_PIIX_TUNING
+ if (!noautodma)
+ hwif->autodma = 1;
+ hwif->dmaproc = &piix_dmaproc;
+ hwif->speedproc = &piix_tune_chipset;
+#endif /* CONFIG_PIIX_TUNING */
+#endif /* !CONFIG_BLK_DEV_IDEDMA */
+}
diff --git a/xen-2.4.16/drivers/net/Makefile b/xen-2.4.16/drivers/net/Makefile
index 8b752b9712..11fecb571b 100644
--- a/xen-2.4.16/drivers/net/Makefile
+++ b/xen-2.4.16/drivers/net/Makefile
@@ -5,12 +5,11 @@ default: $(OBJS)
$(MAKE) -C ne
$(MAKE) -C tulip
$(MAKE) -C e1000
- $(LD) -r -o driver.o $(OBJS) tulip/tulip.o e1000/e1000.o ne/ne_drv.o
+ $(LD) -r -o driver.o e1000/e1000.o $(OBJS) tulip/tulip.o ne/ne_drv.o
clean:
$(MAKE) -C ne clean
$(MAKE) -C tulip clean
- $(MAKE) -C e1000 clean
rm -f *.o *~ core
.PHONY: default clean
diff --git a/xen-2.4.16/drivers/net/e1000/e1000.h b/xen-2.4.16/drivers/net/e1000/e1000.h
index ec7c27aa7e..d94e390ba3 100644
--- a/xen-2.4.16/drivers/net/e1000/e1000.h
+++ b/xen-2.4.16/drivers/net/e1000/e1000.h
@@ -77,14 +77,13 @@
struct e1000_adapter;
// XEN XXX
-#define DBG 1
+// #define DBG 1
#include "e1000_hw.h"
#if DBG
#define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args)
#else
-XXX
#define E1000_DBG(args...)
#endif
diff --git a/xen-2.4.16/drivers/net/e1000/e1000_hw.c b/xen-2.4.16/drivers/net/e1000/e1000_hw.c
index 91053751c7..1d70dab937 100644
--- a/xen-2.4.16/drivers/net/e1000/e1000_hw.c
+++ b/xen-2.4.16/drivers/net/e1000/e1000_hw.c
@@ -1879,7 +1879,7 @@ e1000_read_phy_reg(struct e1000_hw *hw,
uint32_t mdic = 0;
const uint32_t phy_addr = 1;
- DEBUGFUNC("e1000_read_phy_reg");
+ DEBUGFUNC("XXXXe1000_read_phy_reg");
if(reg_addr > MAX_PHY_REG_ADDRESS) {
DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
diff --git a/xen-2.4.16/drivers/net/e1000/e1000_main.c b/xen-2.4.16/drivers/net/e1000/e1000_main.c
index 0d3b62505a..8afbe394c2 100644
--- a/xen-2.4.16/drivers/net/e1000/e1000_main.c
+++ b/xen-2.4.16/drivers/net/e1000/e1000_main.c
@@ -301,11 +301,9 @@ e1000_reset(struct e1000_adapter *adapter)
adapter->hw.fc = adapter->hw.original_fc;
e1000_reset_hw(&adapter->hw);
-printk("RESET_H/W\n");
if(adapter->hw.mac_type >= e1000_82544)
E1000_WRITE_REG(&adapter->hw, WUC, 0);
e1000_init_hw(&adapter->hw);
-printk("INIT H/W\n");
e1000_reset_adaptive(&adapter->hw);
e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
}
@@ -470,14 +468,12 @@ e1000_probe(struct pci_dev *pdev,
printk(KERN_INFO "%s: %s\n", netdev->name, adapter->id_string);
e1000_check_options(adapter);
-printk("OPTIONS OVER\n");
/* Initial Wake on LAN setting
* If APM wake is enabled in the EEPROM,
* enable the ACPI Magic Packet filter
*/
e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data);
-printk("EPROM OVER\n");
if((adapter->hw.mac_type >= e1000_82544) &&
(eeprom_data & E1000_EEPROM_APME))
adapter->wol |= E1000_WUFC_MAG;
@@ -485,7 +481,6 @@ printk("EPROM OVER\n");
/* reset the hardware with the new settings */
e1000_reset(adapter);
-printk("PROBE OVER\n");
cards_found++;
return 0;
diff --git a/xen-2.4.16/drivers/net/e1000/e1000_osdep.h b/xen-2.4.16/drivers/net/e1000/e1000_osdep.h
index e51e083472..fe3fff2af7 100644
--- a/xen-2.4.16/drivers/net/e1000/e1000_osdep.h
+++ b/xen-2.4.16/drivers/net/e1000/e1000_osdep.h
@@ -45,7 +45,7 @@
#define msec_delay(x) {\
int s=jiffies+1+((x*HZ)/1000); \
printk("mdelay(%d) called -- spin\n",x); \
- while(jiffies<s); printk("mdelay over\n");}
+ while(jiffies<s); }
#if 0
/******************** NOT in XEN ! *******/
@@ -73,7 +73,7 @@ typedef enum {
#define ASSERT(x) if(!(x)) BUG()
#define MSGOUT(S, A, B) printk(KERN_DEBUG S "\n", A, B)
-#define DBG 1
+//#define DBG 1
#if DBG
#define DEBUGOUT(S) printk(KERN_DEBUG S "\n")
diff --git a/xen-2.4.16/include/asm-i386/apic.h b/xen-2.4.16/include/asm-i386/apic.h
index 1f5670943b..574cc23203 100644
--- a/xen-2.4.16/include/asm-i386/apic.h
+++ b/xen-2.4.16/include/asm-i386/apic.h
@@ -1,9 +1,12 @@
#ifndef __ASM_APIC_H
#define __ASM_APIC_H
-#include <asm/system.h>
-#include <asm/ptrace.h>
+//#include <linux/config.h>
+//#include <linux/pm.h>
#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
#define APIC_DEBUG 0
@@ -37,9 +40,15 @@ static __inline__ void apic_wait_icr_idle(void)
do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
}
-#define FORCE_READ_AROUND_WRITE 0
-#define apic_read_around(x)
-#define apic_write_around(x,y) apic_write((x),(y))
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
static inline void ack_APIC_irq(void)
{
@@ -64,8 +73,24 @@ extern void init_bsp_APIC (void);
extern void setup_local_APIC (void);
extern void init_apic_mappings (void);
extern void setup_APIC_clocks (void);
+extern void setup_apic_nmi_watchdog (void);
+extern inline void nmi_watchdog_tick (struct pt_regs * regs);
extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+
+//extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
+//extern void apic_pm_unregister(struct pm_dev*);
extern unsigned int apic_timer_irqs [NR_CPUS];
+extern int check_nmi_watchdog (void);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE 0
+#define NMI_IO_APIC 1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID 3
+
+#endif /* CONFIG_X86_LOCAL_APIC */
#endif /* __ASM_APIC_H */
diff --git a/xen-2.4.16/include/asm-i386/apicdef.h b/xen-2.4.16/include/asm-i386/apicdef.h
index f855a7d88d..227bfca652 100644
--- a/xen-2.4.16/include/asm-i386/apicdef.h
+++ b/xen-2.4.16/include/asm-i386/apicdef.h
@@ -32,6 +32,8 @@
#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
#define APIC_ALL_CPUS 0xFF
#define APIC_DFR 0xE0
+#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */
+#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1<<9)
#define APIC_SPIV_APIC_ENABLED (1<<8)
@@ -57,6 +59,7 @@
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_PHYSICAL 0x00000
#define APIC_DEST_LOGICAL 0x00800
#define APIC_DM_FIXED 0x00000
#define APIC_DM_LOWEST 0x00100
@@ -107,7 +110,19 @@
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_IO_APICS 32
+#else
#define MAX_IO_APICS 8
+#endif
+
+
+/*
+ * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs
+ * don't broadcast (yet?), but if they did, they might use 0xFFFF.
+ */
+#define APIC_BROADCAST_ID_XAPIC (0xFF)
+#define APIC_BROADCAST_ID_APIC (0x0F)
/*
* the local APIC register structure, memory mapped. Not terribly well
diff --git a/xen-2.4.16/include/asm-i386/io_apic.h b/xen-2.4.16/include/asm-i386/io_apic.h
index a03be4d733..44916209a8 100644
--- a/xen-2.4.16/include/asm-i386/io_apic.h
+++ b/xen-2.4.16/include/asm-i386/io_apic.h
@@ -15,7 +15,8 @@
#define APIC_MISMATCH_DEBUG
#define IO_APIC_BASE(idx) \
- ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx))
+ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
/*
* The structure of the IO-APIC:
@@ -96,7 +97,7 @@ extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
extern int mp_irq_entries;
/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern struct mpc_config_intsrc *mp_irqs;
/* non-0 if default (table-less) MP configuration */
extern int mpc_default_type;
@@ -132,8 +133,7 @@ static inline void io_apic_sync(unsigned int apic)
}
/* 1 if "noapic" boot option passed */
-//extern int skip_ioapic_setup;
-#define skip_ioapic_setup 0
+extern int skip_ioapic_setup;
/*
* If we use the IO-APIC for IRQ routing, disable automatic
diff --git a/xen-2.4.16/include/asm-i386/mpspec.h b/xen-2.4.16/include/asm-i386/mpspec.h
index 2598ea02db..2829cb54a3 100644
--- a/xen-2.4.16/include/asm-i386/mpspec.h
+++ b/xen-2.4.16/include/asm-i386/mpspec.h
@@ -1,6 +1,7 @@
#ifndef __ASM_MPSPEC_H
#define __ASM_MPSPEC_H
+
/*
* Structure definitions for SMP machines following the
* Intel Multiprocessing Specification 1.1 and 1.4.
@@ -13,8 +14,15 @@
#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
-/* Maximum of 16 APICs with the current APIC ID architecture. */
+/*
+ * a maximum of 16 APICs with the current APIC ID architecture.
+ * xAPICs can have up to 256. SAPICs have 16 ID bits.
+ */
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_APICS 256
+#else
#define MAX_APICS 16
+#endif
#define MAX_MPC_ENTRY 1024
@@ -178,7 +186,11 @@ struct mpc_config_translation
* 7 2 CPU MCA+PCI
*/
+#ifdef CONFIG_MULTIQUAD
+#define MAX_IRQ_SOURCES 512
+#else /* !CONFIG_MULTIQUAD */
#define MAX_IRQ_SOURCES 256
+#endif /* CONFIG_MULTIQUAD */
#define MAX_MP_BUSSES 32
enum mp_bustype {
@@ -187,8 +199,11 @@ enum mp_bustype {
MP_BUS_PCI,
MP_BUS_MCA
};
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+extern int *mp_bus_id_to_type;
+extern int *mp_bus_id_to_node;
+extern int *mp_bus_id_to_local;
+extern int *mp_bus_id_to_pci_bus;
+extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
extern unsigned int boot_cpu_physical_apicid;
extern unsigned long phys_cpu_present_map;
@@ -197,11 +212,9 @@ extern void find_smp_config (void);
extern void get_smp_config (void);
extern int nr_ioapics;
extern int apic_version [MAX_APICS];
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern struct mpc_config_intsrc *mp_irqs;
extern int mpc_default_type;
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern int mp_current_pci_id;
extern unsigned long mp_lapic_addr;
extern int pic_mode;
diff --git a/xen-2.4.16/include/asm-i386/processor.h b/xen-2.4.16/include/asm-i386/processor.h
index f7f949d82b..36a50b2976 100644
--- a/xen-2.4.16/include/asm-i386/processor.h
+++ b/xen-2.4.16/include/asm-i386/processor.h
@@ -358,16 +358,22 @@ struct thread_struct {
trap_info_t traps[256];
};
+#define IDT_ENTRIES 256
+extern struct desc_struct idt_table[];
+extern struct desc_struct *idt_tables[];
+
#define SET_DEFAULT_FAST_TRAP(_p) \
(_p)->fast_trap_idx = 0x20; \
(_p)->fast_trap_desc.a = 0; \
(_p)->fast_trap_desc.b = 0;
#define CLEAR_FAST_TRAP(_p) \
- (memset(idt_table + (_p)->fast_trap_idx, 0, 8))
+ (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ 0, 8))
#define SET_FAST_TRAP(_p) \
- (memcpy(idt_table + (_p)->fast_trap_idx, &((_p)->fast_trap_desc), 8))
+ (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ &((_p)->fast_trap_desc), 8))
#define INIT_THREAD { \
sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \
diff --git a/xen-2.4.16/include/asm-i386/smpboot.h b/xen-2.4.16/include/asm-i386/smpboot.h
index 67bbedbd08..ece215fab0 100644
--- a/xen-2.4.16/include/asm-i386/smpboot.h
+++ b/xen-2.4.16/include/asm-i386/smpboot.h
@@ -1,13 +1,82 @@
#ifndef __ASM_SMPBOOT_H
#define __ASM_SMPBOOT_H
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
+/*emum for clustered_apic_mode values*/
+enum{
+ CLUSTERED_APIC_NONE = 0,
+ CLUSTERED_APIC_XAPIC,
+ CLUSTERED_APIC_NUMAQ
+};
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#ifdef CONFIG_X86_CLUSTERED_APIC
+extern unsigned int apic_broadcast_id;
+extern unsigned char clustered_apic_mode;
+extern unsigned char esr_disable;
+extern unsigned char int_delivery_mode;
+extern unsigned int int_dest_addr_mode;
+extern int cyclone_setup(char*);
-/* How to map from the cpu_present_map. */
-#define cpu_present_to_apicid(apicid) (apicid)
+static inline void detect_clustered_apic(char* oem, char* prod)
+{
+ /*
+ * Can't recognize Summit xAPICs at present, so use the OEM ID.
+ */
+ if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
+ clustered_apic_mode = CLUSTERED_APIC_XAPIC;
+ apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
+ int_dest_addr_mode = APIC_DEST_PHYSICAL;
+ int_delivery_mode = dest_Fixed;
+ esr_disable = 1;
+ /*Start cyclone clock*/
+ cyclone_setup(0);
+ }
+ else if (!strncmp(oem, "IBM NUMA", 8)){
+ clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
+ apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+ int_dest_addr_mode = APIC_DEST_LOGICAL;
+ int_delivery_mode = dest_LowestPrio;
+ esr_disable = 1;
+ }
+}
+#define INT_DEST_ADDR_MODE (int_dest_addr_mode)
+#define INT_DELIVERY_MODE (int_delivery_mode)
+#else /* CONFIG_X86_CLUSTERED_APIC */
+#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
+#define clustered_apic_mode (CLUSTERED_APIC_NONE)
+#define esr_disable (0)
+#define detect_clustered_apic(x,y)
+#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */
+#define INT_DELIVERY_MODE (dest_LowestPrio)
+#endif /* CONFIG_X86_CLUSTERED_APIC */
+#define BAD_APICID 0xFFu
+
+#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
+#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
+
+#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
+
+extern unsigned char raw_phys_apicid[NR_CPUS];
+
+/*
+ * How to map from the cpu_present_map
+ */
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+ return raw_phys_apicid[mps_cpu];
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
+ return mps_cpu;
+}
+
+static inline unsigned long apicid_to_phys_cpu_present(int apicid)
+{
+ if(clustered_apic_mode)
+ return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
+ return 1UL << apicid;
+}
+
+#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
/*
* Mappings between logical cpu number and logical / physical apicid
@@ -22,7 +91,31 @@ extern volatile int cpu_2_physical_apicid[];
#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
+#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
+#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */
#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
+#endif /* CONFIG_MULTIQUAD */
+#ifdef CONFIG_X86_CLUSTERED_APIC
+static inline int target_cpus(void)
+{
+ static int cpu;
+ switch(clustered_apic_mode){
+ case CLUSTERED_APIC_NUMAQ:
+ /* Broadcast intrs to local quad only. */
+ return APIC_BROADCAST_ID_APIC;
+ case CLUSTERED_APIC_XAPIC:
+ /*round robin the interrupts*/
+ cpu = (cpu+1)%smp_num_cpus;
+ return cpu_to_physical_apicid(cpu);
+ default:
+ }
+ return cpu_online_map;
+}
+#else
+#define target_cpus() (0x01)
+#endif
#endif
diff --git a/xen-2.4.16/include/hypervisor-ifs/block.h b/xen-2.4.16/include/hypervisor-ifs/block.h
index 55f7a33ce2..1722a6c288 100644
--- a/xen-2.4.16/include/hypervisor-ifs/block.h
+++ b/xen-2.4.16/include/hypervisor-ifs/block.h
@@ -6,35 +6,80 @@
* These are the ring data structures for buffering messages between
* the hypervisor and guestos's.
*
- * For now we'll start with our own rings for the block IO code instead
- * of using the network rings. Hopefully, this will give us additional
- * flexibility in the future should we choose to move away from a
- * ring producer consumer communication model.
*/
#ifndef __BLOCK_H__
#define __BLOCK_H__
-typedef struct blk_tx_entry_st {
- unsigned long addr; /* virtual address */
- unsigned long size; /* in bytes */
-} blk_tx_entry_t;
+#include <linux/kdev_t.h>
-typedef struct blk_rx_entry_st {
- unsigned long addr; /* virtual address */
- unsigned long size; /* in bytes */
-} blk_rx_entry_t;
+/* the first four definitions match fs.h */
+#define XEN_BLOCK_READ 0
+#define XEN_BLOCK_WRITE 1
+#define XEN_BLOCK_READA 2 /* currently unused */
+#define XEN_BLOCK_SPECIAL 4 /* currently unused */
+#define XEN_BLOCK_PROBE 8 /* determine io configuration from hypervisor */
+#define XEN_BLOCK_DEBUG 16 /* debug */
-typedef struct blk_ring_st {
- blk_tx_entry_t *tx_ring;
- unsigned int tx_prod, tx_cons, tx_event;
- unsigned int tx_ring_size;
+#define XEN_BLOCK_SYNC 2
+#define XEN_BLOCK_ASYNC 3
- blk_rx_entry_t *rx_ring;
- unsigned int rx_prod, rx_cons, rx_event;
- unsigned int rx_ring_size;
+#define XEN_BLOCK_MAX_DOMAINS 32 /* NOTE: FIX THIS. VALUE SHOULD COME FROM? */
+
+#define BLK_TX_RING_SIZE 256
+#define BLK_RX_RING_SIZE 256
+
+#define BLK_TX_RING_MAX_ENTRIES (BLK_TX_RING_SIZE - 2)
+#define BLK_RX_RING_MAX_ENTRIES (BLK_RX_RING_SIZE - 2)
+
+#define BLK_TX_RING_INC(_i) (((_i)+1) & (BLK_TX_RING_SIZE-1))
+#define BLK_RX_RING_INC(_i) (((_i)+1) & (BLK_RX_RING_SIZE-1))
+#define BLK_TX_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_TX_RING_SIZE-1))
+#define BLK_RX_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RX_RING_SIZE-1))
+
+typedef struct blk_ring_entry
+{
+ void * id; /* for guest os use; used for the bh */
+ int priority; /* orig sched pri, SYNC or ASYNC for now */
+ int operation; /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */
+ char * buffer;
+ unsigned long block_number; /* block number */
+ unsigned short block_size; /* block size */
+ kdev_t device;
+ unsigned long sector_number; /* real buffer location on disk */
+} blk_ring_entry_t;
+
+typedef struct blk_ring_st
+{
+ blk_ring_entry_t *tx_ring;
+ unsigned int tx_prod, tx_cons;
+ unsigned int tx_ring_size;
+
+ blk_ring_entry_t *rx_ring;
+ unsigned int rx_prod, rx_cons;
+ unsigned int rx_ring_size;
} blk_ring_t;
-int blk_create_ring(int domain, unsigned long ptr);
+#define MAX_XEN_DISK_COUNT 100
+
+#define XEN_DISK_IDE 1
+#define XEN_DISK_SCSI 2
+
+typedef struct xen_disk /* physical disk */
+{
+ int type; /* disk type */
+ unsigned long capacity;
+ unsigned char heads; /* hdreg.h::hd_geometry */
+ unsigned char sectors; /* hdreg.h::hd_geometry */
+ unsigned int cylinders; /* hdreg.h::hd_big_geometry */
+ unsigned long start; /* hdreg.h::hd_geometry */
+ void * gendisk; /* struct gendisk ptr */
+} xen_disk_t;
+
+typedef struct xen_disk_info
+{
+ int count; /* number of subsequent xen_disk_t structures to follow */
+ xen_disk_t disks[100];
+} xen_disk_info_t;
#endif
diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
index b97fad52f2..d9113805ba 100644
--- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
+++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
@@ -85,10 +85,11 @@ typedef struct
#define __HYPERVISOR_exit 8
#define __HYPERVISOR_dom0_op 9
#define __HYPERVISOR_network_op 10
-#define __HYPERVISOR_set_debugreg 11
-#define __HYPERVISOR_get_debugreg 12
-#define __HYPERVISOR_update_descriptor 13
-#define __HYPERVISOR_set_fast_trap 14
+#define __HYPERVISOR_block_io_op 11
+#define __HYPERVISOR_set_debugreg 12
+#define __HYPERVISOR_get_debugreg 13
+#define __HYPERVISOR_update_descriptor 14
+#define __HYPERVISOR_set_fast_trap 15
#define TRAP_INSTR "int $0x82"
diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h
index 2701eb84e3..f5c1d25d75 100644
--- a/xen-2.4.16/include/xeno/blkdev.h
+++ b/xen-2.4.16/include/xeno/blkdev.h
@@ -85,6 +85,7 @@ struct buffer_head {
struct inode * b_inode;
struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */
+ void *b_xen_request; /* xen request structure */
};
typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
diff --git a/xen-2.4.16/include/xeno/config.h b/xen-2.4.16/include/xeno/config.h
index 637281f64b..d9ffb1d95d 100644
--- a/xen-2.4.16/include/xeno/config.h
+++ b/xen-2.4.16/include/xeno/config.h
@@ -29,6 +29,7 @@
#define CONFIG_IDEDMA_PCI_AUTO 1
#define CONFIG_IDEDMA_AUTO 1
#define CONFIG_BLK_DEV_IDE_MODES 1
+#define CONFIG_BLK_DEV_PIIX 1
#define CONFIG_SCSI 1
#define CONFIG_BLK_DEV_SD 1
diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h
index c4caa4c431..75b7959ee1 100644
--- a/xen-2.4.16/include/xeno/sched.h
+++ b/xen-2.4.16/include/xeno/sched.h
@@ -1,4 +1,3 @@
-
#ifndef _LINUX_SCHED_H
#define _LINUX_SCHED_H
@@ -49,6 +48,7 @@ extern struct mm_struct init_mm;
#define _HYP_EVENT_NEED_RESCHED 0
#define _HYP_EVENT_NET_RX 1
#define _HYP_EVENT_DIE 2
+#define _HYP_EVENT_BLK_RX 3
#define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */
#define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
index 9361a01ec7..74a0c6c565 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
@@ -1,3 +1,3 @@
O_TARGET := blk.o
-obj-y := block.o
+obj-y := xl_block.o xl_block_test.o
include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
new file mode 100644
index 0000000000..0416b467a7
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
@@ -0,0 +1,827 @@
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/fs.h>
+#include <linux/hdreg.h> /* HDIO_GETGEO, et al */
+#include <linux/blkdev.h>
+#include <linux/major.h>
+
+/* NOTE: this is drive independent, so no inclusion of ide.h */
+
+#include <asm/hypervisor-ifs/block.h>
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+#include <asm/io.h>
+#include <asm/uaccess.h> /* put_user() */
+
+#define MAJOR_NR XLBLK_MAJOR /* force defns in blk.h, must preceed include */
+static int xlblk_major = XLBLK_MAJOR;
+
+#include <linux/blk.h> /* must come after definition of MAJOR_NR!! */
+
+/* instead of including linux/ide.h to pick up the definitiong of byte
+ * (and consequently screwing up blk.h, we'll just copy the definition */
+typedef unsigned char byte;
+
+void xlblk_ide_register_disk(int, unsigned long);
+
+#define XLBLK_MAX 2 /* very arbitrary */
+#define XLBLK_MAJOR_NAME "blk"
+#define IDE_PARTN_BITS 6 /* from ide.h::PARTN_BITS */
+#define IDE_PARTN_MASK ((1<<IDE_PARTN_BITS)-1) /* from ide.h::PARTN_MASK */
+static int xlblk_blk_size[XLBLK_MAX];
+static int xlblk_blksize_size[XLBLK_MAX];
+static int xlblk_hardsect_size[XLBLK_MAX];
+static int xlblk_read_ahead[XLBLK_MAX];
+static int xlblk_max_sectors[XLBLK_MAX];
+
+#define XLBLK_RX_IRQ _EVENT_BLK_RX
+#define XLBLK_TX_IRQ _EVENT_BLK_TX
+
+typedef struct xlblk_device
+{
+ struct buffer_head *bh;
+ unsigned int tx_count; /* number of used slots in tx ring */
+} xlblk_device_t;
+
+xlblk_device_t xlblk_device;
+
+/* USE_REQUEST_QUEUE = 1 use (multiple) request queues
+ * = 0 don't use IO request queue
+ */
+#define USE_REQUEST_QUEUE 1
+
+#define XLBLK_DEBUG 0
+#define XLBLK_DEBUG_IOCTL 0
+
+/*
+ * disk management
+ */
+
+xen_disk_info_t xen_disk_info;
+
+/* some declarations */
+void hypervisor_request(void * id,
+ int operation,
+ char * buffer,
+ unsigned long block_number,
+ unsigned short block_size,
+ kdev_t device,
+ int mode);
+
+
+/* ------------------------------------------------------------------------
+ */
+
+static int xenolinux_block_open(struct inode *inode, struct file *filep)
+{
+ if (XLBLK_DEBUG) {
+ printk (KERN_ALERT "xenolinux_block_open\n"); }
+ return 0;
+}
+
+static int xenolinux_block_release(struct inode *inode, struct file *filep)
+{
+ if (XLBLK_DEBUG) {
+ printk (KERN_ALERT "xenolinux_block_release\n"); }
+ return 0;
+}
+
+static int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ int minor_dev;
+
+ if (XLBLK_DEBUG_IOCTL)
+ {
+ printk (KERN_ALERT "xenolinux_block_ioctl\n");
+ }
+
+ /* check permissions */
+ if (!capable(CAP_SYS_ADMIN)) return -EPERM;
+ if (!inode) return -EINVAL;
+ minor_dev = MINOR(inode->i_rdev);
+ if (minor_dev >= XLBLK_MAX) return -ENODEV;
+
+ if (XLBLK_DEBUG_IOCTL)
+ {
+ printk (KERN_ALERT
+ " command: 0x%x, argument: 0x%lx, minor: 0x%x\n",
+ command, (long) argument, minor_dev);
+ }
+
+ switch (command)
+ {
+ case BLKGETSIZE :
+ {
+ if (XLBLK_DEBUG_IOCTL)
+ {
+ printk (KERN_ALERT
+ " BLKGETSIZE: %x %lx\n", BLKGETSIZE,
+ (long) xen_disk_info.disks[0].capacity);
+ }
+ return put_user(xen_disk_info.disks[0].capacity,
+ (unsigned long *) argument);
+ }
+ case BLKRRPART :
+ {
+ if (XLBLK_DEBUG_IOCTL) {
+ printk (KERN_ALERT " BLKRRPART: %x\n", BLKRRPART); }
+ break;
+ }
+ case BLKSSZGET :
+ {
+ if (XLBLK_DEBUG_IOCTL) {
+ printk (KERN_ALERT " BLKSSZGET: %x 0x%x\n", BLKSSZGET,
+ xlblk_hardsect_size[minor_dev]); }
+ return xlblk_hardsect_size[minor_dev];
+ }
+ case HDIO_GETGEO :
+ {
+ struct hd_geometry *geo = (struct hd_geometry *)argument;
+
+ if (XLBLK_DEBUG_IOCTL) {
+ printk (KERN_ALERT " HDIO_GETGEO: %x\n", HDIO_GETGEO); }
+
+ if (!argument) return -EINVAL;
+ /*
+ if (put_user(0x80, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x20b, (unsigned short *) &geo->cylinders)) return -EFAULT;
+ */
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned short *) &geo->cylinders)) return -EFAULT;
+
+ return 0;
+ }
+ case HDIO_GETGEO_BIG :
+ {
+ struct hd_big_geometry *geo = (struct hd_big_geometry *) argument;
+
+ if (XLBLK_DEBUG_IOCTL) {
+ printk (KERN_ALERT " HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); }
+
+ if (!argument) return -EINVAL;
+ /*
+ if (put_user(0x80, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x20b, (unsigned int *) &geo->cylinders)) return -EFAULT;
+ */
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
+
+ return 0;
+ }
+ default :
+ {
+ if (XLBLK_DEBUG_IOCTL) {
+ printk (KERN_ALERT " eh? unknown ioctl\n"); }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int xenolinux_block_check(kdev_t dev)
+{
+ if (XLBLK_DEBUG) {
+ printk (KERN_ALERT "xenolinux_block_check\n"); }
+ return 0;
+}
+
+static int xenolinux_block_revalidate(kdev_t dev)
+{
+ if (XLBLK_DEBUG) {
+ printk (KERN_ALERT "xenolinux_block_revalidate\n"); }
+ return 0;
+}
+
+/*
+ * hypervisor_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: XEN_BLOCK_READ, XEN_BLOCK_WRITE or XEN_BLOCK_PROBE
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ * block_number: block to read
+ * block_size: size of each block
+ * device: ide/hda is 768 or 0x300
+ * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC. async requests
+ * will queue until a sync request is issued.
+ */
+
+void hypervisor_request(void * id,
+ int operation,
+ char * buffer,
+ unsigned long block_number,
+ unsigned short block_size,
+ kdev_t device,
+ int mode)
+{
+ blk_ring_t *blk_ring = start_info.blk_ring;
+ int position;
+ void *buffer_pa, *buffer_ma;
+ kdev_t phys_device = (kdev_t) 0;
+ unsigned long sector_number = 0;
+
+#if 0
+ printk(KERN_ALERT "[%x]", id);
+ printk (KERN_ALERT
+ "xlblk_req: id:%p op:%d, bf:%p, blk:%lu, sz:%u, dev:%x\n",
+ id, operation, buffer, block_number, block_size, device);
+#endif
+
+ /* XXX SMH: now need to convert guest virtual address to machine address */
+ buffer_pa = (void *)virt_to_phys((unsigned long)buffer);
+ buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa);
+
+#if 0
+ printk(KERN_ALERT "va %p => pa %p => ma %p\n", buffer, buffer_pa, buffer_ma);
+#endif
+
+ if (operation == XEN_BLOCK_PROBE)
+ {
+ phys_device = (kdev_t) 0;
+ sector_number = 0;
+ }
+ else if (operation == XEN_BLOCK_READ || operation == XEN_BLOCK_WRITE)
+ {
+ /*
+ * map logial major device to the physical device number
+ *
+ * XLBLK_MAJOR -> IDE0_MAJOR (123 -> 3)
+ */
+ if (MAJOR(device) == XLBLK_MAJOR)
+ {
+ phys_device = MKDEV(IDE0_MAJOR, 0);
+ }
+ else
+ {
+ printk (KERN_ALERT
+ "error: xl_block::hypervisor_request: unknown device [0x%x]\n",
+ device);
+ BUG();
+ }
+
+ /*
+ * compute real buffer location on disk
+ * (from ll_rw_block.c::submit_bh)
+ */
+ {
+ int idx = 0;
+
+ struct gendisk *gd = (struct gendisk *) xen_disk_info.disks[idx].gendisk;
+ unsigned int minor = MINOR(device);
+
+ sector_number = block_number /* * block_size >> 9 */;
+
+ if (gd != NULL) /* if we have a partition table... */
+ {
+ sector_number += gd->part[minor & IDE_PARTN_MASK].start_sect;
+ }
+ }
+ }
+
+ /*
+ * CHECK TO SEE IF THERE IS SPACE IN THE RING
+ */
+ if (BLK_TX_RING_INC(blk_ring->tx_prod) == blk_ring->tx_cons)
+ {
+ printk (KERN_ALERT "hypervisor_request: tx_cons: %d, tx_prod:%d",
+ blk_ring->tx_cons, blk_ring->tx_prod);
+ }
+
+ /* fill out a communications ring structure
+ and then trap into the hypervisor */
+ position = blk_ring->tx_prod;
+ blk_ring->tx_ring[position].id = id;
+ blk_ring->tx_ring[position].priority = mode;
+ blk_ring->tx_ring[position].operation = operation;
+ blk_ring->tx_ring[position].buffer = buffer_ma;
+ blk_ring->tx_ring[position].block_number = block_number;
+ blk_ring->tx_ring[position].block_size = block_size;
+ blk_ring->tx_ring[position].device = phys_device;
+ blk_ring->tx_ring[position].sector_number = sector_number;
+
+ blk_ring->tx_prod = BLK_TX_RING_INC(blk_ring->tx_prod);
+
+ if (mode == XEN_BLOCK_SYNC)
+ {
+ /* trap into hypervisor */
+ HYPERVISOR_block_io_op();
+ }
+ else if (mode == XEN_BLOCK_ASYNC)
+ {
+ /* for now, do nothing. the request will go in the ring and
+ the next sync request will trigger the hypervisor to act */
+ }
+ else
+ {
+ /* ummm, unknown mode. */
+ BUG();
+ }
+
+ return;
+}
+
+
+/*
+ * do_xlblk_request
+ *
+ * read a block; request is in a request queue
+ *
+ * TO DO: should probably release the io_request_lock and then re-acquire
+ * (see LDD p. 338)
+ */
+
+static void do_xlblk_request (request_queue_t *rq)
+{
+ struct request *req;
+
+ if (XLBLK_DEBUG)
+ {
+ printk (KERN_ALERT "xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME);
+ }
+
+ while (!QUEUE_EMPTY)
+ {
+ struct buffer_head *bh;
+ unsigned long offset;
+ unsigned long length;
+ int rw;
+
+ req = CURRENT;
+
+ if (XLBLK_DEBUG)
+ {
+ printk (KERN_ALERT
+ "do_xlblk_request %p: cmd %i, sec %lx, (%li) bh:%p\n",
+ req, req->cmd, req->sector,
+ req->current_nr_sectors, req->bh);
+ }
+
+ /* is there space in the tx ring for this request?
+ * if the ring is full, then leave the request in the queue
+ *
+ * THIS IS A BIT BOGUS SINCE XEN COULD BE UPDATING TX_CONS
+ * AT THE SAME TIME
+ */
+ {
+ blk_ring_t *blk_ring = start_info.blk_ring;
+
+ if (BLK_RX_RING_INC(blk_ring->tx_prod) == blk_ring->tx_cons)
+ {
+ printk (KERN_ALERT "OOPS, TX LOOKS FULL cons: %d prod: %d\n",
+ blk_ring->tx_cons, blk_ring->tx_prod);
+ break;
+ }
+ }
+
+ req->errors = 0;
+ blkdev_dequeue_request(req);
+
+ bh = req->bh;
+
+ while (bh)
+ {
+
+ offset = bh->b_rsector << 9;
+ length = bh->b_size;
+
+ rw = req->cmd;
+ if (rw == READA) rw= READ;
+ if ((rw != READ) && (rw != WRITE))
+ {
+ printk (KERN_ALERT
+ "XenoLinux Virtual Block Device: bad command: %d\n", rw);
+ BUG();
+ }
+
+ /*
+ if (XLBLK_DEBUG)
+ {
+ printk (KERN_ALERT "xlblk.c::do_xlblk_request\n");
+ printk (KERN_ALERT " b_blocknr: 0x%lx %ld\n",
+ bh->b_blocknr, bh->b_blocknr);
+ printk (KERN_ALERT " b_size: 0x%x %d\n", bh->b_size, bh->b_size);
+ printk (KERN_ALERT " b_dev: 0x%x %d\n", bh->b_dev, bh->b_dev);
+ printk (KERN_ALERT " b_rsector: 0x%lx %ld\n",
+ bh->b_rsector, bh->b_rsector);
+ }
+ */
+
+ hypervisor_request (req, rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
+ bh->b_data, bh->b_rsector, bh->b_size,
+ bh->b_dev, XEN_BLOCK_SYNC);
+
+ bh = bh->b_reqnext;
+ }
+ }
+
+ return;
+}
+
+/*
+ * xenolinux_block_request
+ *
+ * read a block without using a request queue
+ */
+
+static int xenolinux_block_request(request_queue_t *rq,
+ int rw,
+ struct buffer_head *bh)
+{
+ unsigned int minor;
+ unsigned long offset;
+ unsigned long length;
+
+ if (XLBLK_DEBUG) {
+ printk (KERN_ALERT "xlblk.c::xenolinux_block_request: %lx %d %lx\n",
+ (unsigned long) rq, rw, (unsigned long) bh); }
+ /*
+ printk (KERN_ALERT "xlblk.c::xlblk_request: op:%d bh:%p sect:%lu sz:%u\n",
+ rw, bh, bh->b_rsector, bh->b_size);
+ */
+
+ minor = MINOR(bh->b_rdev);
+
+ offset = bh->b_rsector << 9;
+ length = bh->b_size;
+
+ if (rw == READA) rw= READ;
+ if ((rw != READ) && (rw != WRITE))
+ {
+ printk (KERN_ALERT
+ "XenoLinux Virtual Block Device: bad command: %d\n", rw);
+ goto fail;
+ }
+
+ hypervisor_request (bh, rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
+ bh->b_data, bh->b_rsector, bh->b_size,
+ bh->b_dev, XEN_BLOCK_SYNC);
+
+ return 0;
+
+ fail:
+ return 0;
+}
+
+static struct block_device_operations xenolinux_block_fops =
+{
+ open: xenolinux_block_open,
+ release: xenolinux_block_release,
+ ioctl: xenolinux_block_ioctl,
+ check_media_change: xenolinux_block_check,
+ revalidate: xenolinux_block_revalidate,
+};
+
+static void xlblk_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ xlblk_device_t *dev = (xlblk_device_t *)dev_id;
+ blk_ring_t *blk_ring = start_info.blk_ring;
+ struct buffer_head *bh;
+ struct request *req;
+ int loop;
+
+ for (loop = blk_ring->rx_cons;
+ loop != blk_ring->rx_prod;
+ loop = BLK_RX_RING_INC(loop))
+ {
+ blk_ring_entry_t *bret = &blk_ring->rx_ring[loop];
+ void *buffer_pa, *buffer_va;
+
+ buffer_pa = machine_to_phys((unsigned long)bret->buffer);
+ buffer_va = phys_to_virt((unsigned long)buffer_pa);
+
+#if 0
+ printk(KERN_ALERT "xlblk_rx_int: buffer ma %p => pa %p => va %p\n",
+ bret->buffer, buffer_pa, buffer_va);
+
+
+ if (XLBLK_DEBUG)
+ {
+ printk (KERN_ALERT
+ "xlblock::xlblk_rx_int [%s]\n",
+ (bret->operation == XEN_BLOCK_READ) ? "read" : "write");
+ printk (KERN_ALERT
+ " vbuf: %lx, pbuf: %lx, blockno: %lx, size: %x, device %x\n",
+ (unsigned long) buffer_va, (unsigned long) bret->buffer,
+ bret->block_number, bret->block_size, bret->device);
+ printk (KERN_ALERT " bret: %p bh: %p\n", bret, bret->id);
+ }
+
+ /*
+ printk (KERN_ALERT
+ "xlblk_rx: id:%p op:%d, bf:%p, blk:%lu, sz:%u, dev:%x\n",
+ bret->id, bret->operation, bret->buffer, bret->block_number,
+ bret->block_size, bret->device);
+ */
+#endif
+
+ if (USE_REQUEST_QUEUE)
+ {
+ req = (struct request *)bret->id;
+ printk(KERN_ALERT "|%x|", req);
+
+ if (!end_that_request_first(req, 1, "NAME"))
+ {
+ blkdev_dequeue_request(req);
+
+ /* should be end_that_request_last(req)
+ to wake up waiting processes (with complete) */
+ blkdev_release_request(req);
+ }
+
+ /*
+ if (XLBLK_DEBUG)
+ {
+ int temp;
+ printk(KERN_ALERT
+ "buff: 0x%p, blkno: 0x%lx, size: 0x%x, device 0x%x [%p]\n",
+ vbuffer, bret->block_number, bret->block_size, bret->device,
+ bh->b_end_io);
+
+ for (temp = 0; temp < bret->block_size; temp++)
+ {
+ if (temp % 16 == 0) printk ("[%4x] ", temp);
+ else if (temp % 4 == 0) printk (" ");
+ printk ("%02x",
+ vbuffer[temp] & 255);
+ if ((temp + 1) % 16 == 0) printk ("\n");
+ }
+ printk ("\n\n");
+ }
+ */
+
+#ifdef BOGUS
+ req = (struct request *)bret->id;
+ while ((bh = req->bh) != NULL)
+ {
+ req->bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh,1);
+ }
+ blkdev_release_request(req);
+#endif /* BOGUS */
+ }
+ else
+ {
+ bh = (struct buffer_head *)bret->id;
+ bh->b_end_io(bh,1);
+
+ /*
+ if (XLBLK_DEBUG)
+ {
+ int temp;
+#if 0
+ printk(KERN_ALERT
+ "buff: 0x%p, blkno: 0x%lx, size: 0x%x, device 0x%x [%p]\n",
+ vbuffer, bret->block_number, bret->block_size, bret->device,
+ bh->b_end_io);
+#endif
+
+ for (temp = 0; temp < bret->block_size; temp++)
+ {
+ if (temp % 16 == 0) printk ("[%4x] ", temp);
+ else if (temp % 4 == 0) printk (" ");
+ printk ("%02x",
+ vbuffer[temp] & 255);
+ if ((temp + 1) % 16 == 0) printk ("\n");
+ }
+ printk ("\n\n");
+ }
+ */
+ }
+ }
+
+ blk_ring->rx_cons = loop;
+}
+
+static void xlblk_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ if (XLBLK_DEBUG) {
+ printk (KERN_ALERT "--- xlblock::xlblk_tx_int\n"); }
+}
+
+int __init xlblk_init(void)
+{
+ blk_ring_t *blk_ring = start_info.blk_ring;
+ int loop, error, result;
+
+ /*
+ * initialize memory rings to communicate with hypervisor
+ */
+
+ if ( blk_ring == NULL ) return -ENOMEM;
+
+ blk_ring->tx_prod = blk_ring->tx_cons = 0;
+ blk_ring->rx_prod = blk_ring->rx_cons = 0;
+ blk_ring->tx_ring = NULL;
+ blk_ring->rx_ring = NULL;
+
+ blk_ring->tx_ring = kmalloc(BLK_TX_RING_SIZE * sizeof(blk_ring_entry_t),
+ GFP_KERNEL);
+ blk_ring->rx_ring = kmalloc(BLK_RX_RING_SIZE * sizeof(blk_ring_entry_t),
+ GFP_KERNEL);
+
+ if ((blk_ring->tx_ring == NULL) ||
+ (blk_ring->rx_ring == NULL))
+ {
+ printk (KERN_ALERT
+ "error, could not allocate ring memory for block device\n");
+ error = -ENOBUFS;
+ goto fail;
+ }
+
+ /*
+ * setup soft interrupts to communicate with hypervisor
+ */
+
+ error = request_irq(XLBLK_RX_IRQ, xlblk_rx_int, 0, "xlblk-rx",
+ &xlblk_device);
+ if (error)
+ {
+ printk(KERN_ALERT "Could not allocate receive interrupt\n");
+ goto fail;
+ }
+
+ error = request_irq(XLBLK_TX_IRQ, xlblk_tx_int, 0, "xlblk-tx",
+ &xlblk_device);
+ if (error)
+ {
+ printk(KERN_ALERT "Could not allocate transmit interrupt\n");
+ free_irq(XLBLK_RX_IRQ, &xlblk_device);
+ goto fail;
+ }
+
+ /*
+ * get information about physical drives
+ *
+ */
+ {
+ /* NOTE: this should only occur in domain 0 */
+ memset (&xen_disk_info, 0, sizeof(xen_disk_info));
+ xen_disk_info.count = 0;
+
+ hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
+ 0, 0, (kdev_t) 0, XEN_BLOCK_SYNC);
+
+ {
+ int loop;
+ for (loop = 0; loop < xen_disk_info.count; loop++)
+ {
+ printk (KERN_ALERT " %2d: type: %d, capacity: %ld\n",
+ loop, xen_disk_info.disks[loop].type,
+ xen_disk_info.disks[loop].capacity);
+ }
+ }
+ }
+
+ /*
+ * initialize device driver
+ */
+
+ SET_MODULE_OWNER(&xenolinux_block_fops);
+
+ result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops);
+ if (result < 0)
+ {
+ printk (KERN_ALERT "xenolinux block: can't get major %d\n", xlblk_major);
+ return result;
+ }
+
+ /* initialize global arrays in drivers/block/ll_rw_block.c */
+ blk_size[xlblk_major] = xlblk_blk_size;
+ blksize_size[xlblk_major] = xlblk_blksize_size;
+ hardsect_size[xlblk_major] = xlblk_hardsect_size;
+ read_ahead[xlblk_major] = xlblk_read_ahead;
+ max_sectors[xlblk_major] = xlblk_max_sectors;
+ for (loop = 0; loop < XLBLK_MAX; loop++)
+ {
+ xlblk_blk_size[loop] = xen_disk_info.disks[0].capacity;
+ xlblk_blksize_size[loop] = 512;
+ xlblk_hardsect_size[loop] = 512;
+ xlblk_read_ahead[loop] = 8;
+ xlblk_max_sectors[loop] = 128;
+ }
+
+ if (USE_REQUEST_QUEUE)
+ {
+ /* NEED TO MODIFY THIS TO HANDLE MULTIPLE QUEUES
+ * also, should replace do_xlblk_request with blk.h::DEVICE_REQUEST
+ */
+ blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request);
+ blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0);
+ }
+ else
+ {
+ /* we don't use __make_request in ll_rw_blk */
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(xlblk_major),
+ xenolinux_block_request);
+ }
+ xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity);
+
+ /*
+ * completion
+ */
+ printk(KERN_ALERT
+ "XenoLinux Virtual Block Device Driver installed [device: %d]\n",
+ xlblk_major);
+ return 0;
+
+ fail:
+ if (blk_ring->tx_ring) kfree(blk_ring->tx_ring);
+ if (blk_ring->rx_ring) kfree(blk_ring->rx_ring);
+ return error;
+}
+
+void xlblk_ide_register_disk(int idx, unsigned long capacity)
+{
+ int units;
+ int minors;
+ struct gendisk *gd;
+
+ /* plagarized from ide-probe.c::init_gendisk */
+
+ units = 2; /* from ide.h::MAX_DRIVES */
+
+ minors = units * (1<<IDE_PARTN_BITS);
+ gd = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
+ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
+ gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
+ memset(gd->part, 0, minors * sizeof(struct hd_struct));
+
+ gd->major = xlblk_major; /* our major device number */
+ gd->major_name = XLBLK_MAJOR_NAME; /* treated special in genhd.c */
+ gd->minor_shift = IDE_PARTN_BITS; /* num bits for partitions */
+ gd->max_p = 1<<IDE_PARTN_BITS; /* 1 + max partitions / drive */
+ gd->nr_real = units; /* current num real drives */
+ gd->real_devices= NULL; /* ptr to internal data (was: hwif) */
+ gd->next = NULL; /* linked list of major devs */
+ gd->fops = &xenolinux_block_fops; /* file operations */
+ gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL);
+ gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL);
+ if (gd->de_arr) memset (gd->de_arr, 0, sizeof *gd->de_arr * units);
+ if (gd->flags) memset (gd->flags, 0, sizeof *gd->flags * units);
+ add_gendisk(gd);
+
+ xen_disk_info.disks[idx].gendisk = gd;
+
+ /* default disk size is just a big number. in the future, we
+ need a message to probe the devices to determine the actual size */
+ register_disk(gd, MKDEV(xlblk_major, 0), 1<<IDE_PARTN_BITS,
+ &xenolinux_block_fops, capacity);
+
+ return;
+}
+
+static void __exit xlblk_cleanup(void)
+{
+ /* CHANGE FOR MULTIQUEUE */
+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(xlblk_major));
+
+ /* clean up global arrays */
+ read_ahead[xlblk_major] = 0;
+ if (blk_size[xlblk_major]) kfree(blk_size[xlblk_major]);
+ blk_size[xlblk_major] = NULL;
+ if (blksize_size[xlblk_major]) kfree(blksize_size[xlblk_major]);
+ blksize_size[xlblk_major] = NULL;
+ if (hardsect_size[xlblk_major]) kfree(hardsect_size[xlblk_major]);
+ hardsect_size[xlblk_major] = NULL;
+
+ /*
+ *
+ * TODO: FOR EACH GENDISK, FREE
+ *
+ */
+
+ if (unregister_blkdev(xlblk_major, "block"))
+ {
+ printk(KERN_ALERT
+ "XenoLinux Virtual Block Device Driver uninstalled with errors\n");
+ }
+ else
+ {
+ printk(KERN_ALERT "XenoLinux Virtual Block Device Driver uninstalled\n");
+ }
+
+ return;
+}
+
+
+#ifdef MODULE
+module_init(xlblk_init);
+module_exit(xlblk_cleanup);
+#endif
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c
new file mode 100644
index 0000000000..cab6d9a330
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c
@@ -0,0 +1,233 @@
+/******************************************************************************
+ * xenolinux_block_test.c
+ *
+ */
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <asm/hypervisor-ifs/block.h>
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+
+/******************************************************************/
+
+static struct proc_dir_entry *bdt;
+static blk_ring_entry_t meta;
+static char * data;
+
+static int proc_read_bdt(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ switch (meta.operation)
+ {
+ case XEN_BLOCK_READ :
+ case XEN_BLOCK_WRITE :
+ {
+ return proc_dump_block(page, start, off, count, eof, data);
+ }
+ case XEN_BLOCK_DEBUG :
+ {
+ return proc_dump_debug(page, start, off, count, eof, data);
+ }
+ default :
+ {
+ printk(KERN_ALERT
+ "block device test error: unknown operation [%c]\n",
+ meta.operation);
+ return -EINVAL;
+ }
+ }
+}
+
+int proc_dump_debug(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ char header[100];
+ char dump[1024];
+
+ sprintf (header, "Block Device Test: Debug Dump\n\n");
+
+ sprintf (dump, "%s\n", meta.buffer);
+
+ if (data)
+ {
+ kfree(data);
+ }
+
+ strncpy (page, dump, count);
+ return strlen(page);
+}
+
+int proc_dump_block(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ char header[100];
+ char dump[1024];
+ char temp[100];
+ int loop;
+
+ sprintf (header, "Block Device Test\n\n%s blk num: %ld 0x%lx; size: %d 0x%x; device: 0x%x\n",
+ meta.operation == XEN_BLOCK_WRITE ? "write" : "read",
+ meta.block_number, meta.block_number,
+ meta.block_size, meta.block_size,
+ meta.device);
+
+ sprintf (dump, "%s", header);
+
+ if (meta.buffer)
+ {
+ for (loop = 0; loop < 100; loop++)
+ {
+ int i = meta.buffer[loop];
+
+ if (loop % 8 == 0)
+ {
+ sprintf (temp, "[%2d] ", loop);
+ strcat(dump, temp);
+ }
+ else if (loop % 2 == 0)
+ {
+ strcat(dump, " ");
+ }
+
+ sprintf (temp, " 0x%02x", i & 255);
+ strcat(dump, temp);
+ if ((loop + 1) % 8 == 0)
+ {
+ strcat(dump, "\n");
+ }
+ }
+ strcat(dump, "\n\n");
+ }
+
+ if (data)
+ {
+ kfree(data);
+ }
+
+ strncpy (page, dump, count);
+ return strlen(page);
+}
+
+int proc_write_bdt(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL);
+ char opcode;
+ int block_number = 0;
+ int block_size = 0;
+ int device = 0;
+ int mode;
+
+ if (copy_from_user(local, buffer, count))
+ {
+ return -EFAULT;
+ }
+ local[count] = '\0';
+
+ sscanf(local, "%c %i %i %i",
+ &opcode, &block_number, &block_size, &device);
+
+ if (opcode == 'r' || opcode == 'R')
+ {
+ meta.operation = XEN_BLOCK_READ;
+ }
+ else if (opcode == 'w' || opcode == 'W')
+ {
+ meta.operation = XEN_BLOCK_WRITE;
+ }
+ else if (opcode == 'd' || opcode == 'D')
+ {
+ meta.operation = XEN_BLOCK_DEBUG;
+ block_size = 10000;
+ }
+ else
+ {
+ printk(KERN_ALERT
+ "block device test error: unknown opcode [%c]\n", opcode);
+ return -EINVAL;
+ }
+
+ if (opcode == 'r' || opcode == 'w' ||
+ opcode == 'd' || opcode == 'D')
+ {
+ mode = XEN_BLOCK_SYNC;
+ }
+ else /* (opcode == 'R' || opcode == 'W') */
+ {
+ mode = XEN_BLOCK_ASYNC;
+ }
+
+ if (data)
+ {
+ kfree(data);
+ }
+ data = kmalloc(block_size * sizeof(char), GFP_KERNEL);
+ if (data == NULL)
+ {
+ kfree(local);
+ return -ENOMEM;
+ }
+
+ meta.block_number = block_number;
+ meta.block_size = block_size;
+ meta.device = device;
+ meta.buffer = data;
+
+ /* submit request */
+ hypervisor_request(0, meta.operation, meta.buffer,
+ meta.block_number, meta.block_size,
+ meta.device, mode);
+
+ kfree(local);
+ return count;
+}
+
+
+static int __init init_module(void)
+{
+ int return_value = 0;
+
+ /* create proc entry */
+ bdt = create_proc_entry("bdt", 0644, NULL);
+ if (bdt == NULL)
+ {
+ return_value = -ENOMEM;
+ goto error;
+ }
+ bdt->data = NULL;
+ bdt->read_proc = proc_read_bdt;
+ bdt->write_proc = proc_write_bdt;
+ bdt->owner = THIS_MODULE;
+
+ memset(&meta, 0, sizeof(meta));
+
+ /* success */
+ printk(KERN_ALERT "XenoLinux Block Device Test installed\n");
+ return 0;
+
+ error:
+ return return_value;
+}
+
+static void __exit cleanup_module(void)
+{
+ if (data)
+ {
+ kfree(data);
+ }
+ printk(KERN_ALERT "XenoLinux Block Device Test uninstalled\n");
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c
new file mode 100644
index 0000000000..97d4a65b78
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c
@@ -0,0 +1,27 @@
+/*
+ * domain 0 block driver interface
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+static int __init init_module(void)
+{
+ request_module("xl_block");
+ printk("Successfully installed domain 0 block interface\n");
+
+
+ return 0;
+}
+
+static void __exit cleanup_module(void)
+{
+ printk("Successfully de-installed domain-0 block interface\n");
+ return 0;
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.16-sparse/drivers/block/Config.in b/xenolinux-2.4.16-sparse/drivers/block/Config.in
new file mode 100644
index 0000000000..716774fe74
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/drivers/block/Config.in
@@ -0,0 +1,51 @@
+#
+# Block device driver configuration
+#
+mainmenu_option next_comment
+comment 'Block devices'
+
+tristate 'Normal PC floppy disk support' CONFIG_BLK_DEV_FD
+if [ "$CONFIG_AMIGA" = "y" ]; then
+ tristate 'Amiga floppy support' CONFIG_AMIGA_FLOPPY
+fi
+if [ "$CONFIG_ATARI" = "y" ]; then
+ tristate 'Atari floppy support' CONFIG_ATARI_FLOPPY
+fi
+if [ "$CONFIG_MAC" = "y" ]; then
+ dep_bool 'Macintosh IIfx/Quadra 900/Quadra 950 floppy support (EXPERIMENTAL)' CONFIG_BLK_DEV_SWIM_IOP $CONFIG_EXPERIMENTAL
+fi
+if [ "$CONFIG_MCA" = "y" ]; then
+ tristate 'PS/2 ESDI hard disk support' CONFIG_BLK_DEV_PS2
+fi
+if [ "$CONFIG_ZORRO" = "y" ]; then
+ tristate 'Amiga Zorro II ramdisk support' CONFIG_AMIGA_Z2RAM
+fi
+if [ "$CONFIG_ATARI" = "y" ]; then
+ tristate 'Atari ACSI support' CONFIG_ATARI_ACSI
+ if [ "$CONFIG_ATARI_ACSI" != "n" ]; then
+ comment 'Some devices (e.g. CD jukebox) support multiple LUNs'
+ bool ' Probe all LUNs on each ACSI device' CONFIG_ACSI_MULTI_LUN
+ tristate ' Atari SLM laser printer support' CONFIG_ATARI_SLM
+ fi
+fi
+dep_tristate 'XT hard disk support' CONFIG_BLK_DEV_XD $CONFIG_ISA
+dep_tristate 'Parallel port IDE device support' CONFIG_PARIDE $CONFIG_PARPORT
+if [ "$CONFIG_PARIDE" = "y" -o "$CONFIG_PARIDE" = "m" ]; then
+ source drivers/block/paride/Config.in
+fi
+dep_tristate 'Compaq SMART2 support' CONFIG_BLK_CPQ_DA $CONFIG_PCI
+dep_tristate 'Compaq Smart Array 5xxx support' CONFIG_BLK_CPQ_CISS_DA $CONFIG_PCI
+dep_tristate 'Mylex DAC960/DAC1100 PCI RAID Controller support' CONFIG_BLK_DEV_DAC960 $CONFIG_PCI
+
+tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
+dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
+
+tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
+if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
+ int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
+fi
+dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
+
+bool 'XenoLinux virtual block device support' CONFIG_XENOLINUX_BLOCK
+
+endmenu
diff --git a/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
index a9e973d05f..c6d5c9625b 100644
--- a/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
+++ b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
@@ -1227,6 +1227,11 @@ int __init blk_dev_init(void)
#ifdef CONFIG_SUN_JSFLASH
jsfd_init();
#endif
+
+#ifdef CONFIG_XENOLINUX_BLOCK
+ xlblk_init();
+#endif
+
return 0;
};
diff --git a/xenolinux-2.4.16-sparse/fs/partitions/check.c b/xenolinux-2.4.16-sparse/fs/partitions/check.c
new file mode 100644
index 0000000000..e564544ec6
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/fs/partitions/check.c
@@ -0,0 +1,443 @@
+/*
+ * Code extracted from drivers/block/genhd.c
+ * Copyright (C) 1991-1998 Linus Torvalds
+ * Re-organised Feb 1998 Russell King
+ *
+ * We now have independent partition support from the
+ * block drivers, which allows all the partition code to
+ * be grouped in one location, and it to be mostly self
+ * contained.
+ *
+ * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/blk.h>
+#include <linux/init.h>
+#include <linux/raid/md.h>
+
+#include "check.h"
+
+#include "acorn.h"
+#include "amiga.h"
+#include "atari.h"
+#include "ldm.h"
+#include "mac.h"
+#include "msdos.h"
+#include "osf.h"
+#include "sgi.h"
+#include "sun.h"
+#include "ibm.h"
+#include "ultrix.h"
+
+extern int *blk_size[];
+
+#define CHECK_DEBUG 0
+
+int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
+
+static int (*check_part[])(struct gendisk *hd, struct block_device *bdev, unsigned long first_sect, int first_minor) = {
+#ifdef CONFIG_ACORN_PARTITION
+ acorn_partition,
+#endif
+#ifdef CONFIG_LDM_PARTITION
+ ldm_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_MSDOS_PARTITION
+ msdos_partition,
+#endif
+#ifdef CONFIG_OSF_PARTITION
+ osf_partition,
+#endif
+#ifdef CONFIG_SUN_PARTITION
+ sun_partition,
+#endif
+#ifdef CONFIG_AMIGA_PARTITION
+ amiga_partition,
+#endif
+#ifdef CONFIG_ATARI_PARTITION
+ atari_partition,
+#endif
+#ifdef CONFIG_MAC_PARTITION
+ mac_partition,
+#endif
+#ifdef CONFIG_SGI_PARTITION
+ sgi_partition,
+#endif
+#ifdef CONFIG_ULTRIX_PARTITION
+ ultrix_partition,
+#endif
+#ifdef CONFIG_IBM_PARTITION
+ ibm_partition,
+#endif
+ NULL
+};
+
+/*
+ * This is ucking fugly but its probably the best thing for 2.4.x
+ * Take it as a clear reminder than we should put the device name
+ * generation in the object kdev_t points to in 2.5.
+ */
+
+#ifdef CONFIG_ARCH_S390
+int (*genhd_dasd_name)(char*,int,int,struct gendisk*) = NULL;
+EXPORT_SYMBOL(genhd_dasd_name);
+#endif
+
+/*
+ * disk_name() is used by partition check code and the md driver.
+ * It formats the devicename of the indicated disk into
+ * the supplied buffer (of size at least 32), and returns
+ * a pointer to that same buffer (for convenience).
+ */
+
+char *disk_name (struct gendisk *hd, int minor, char *buf)
+{
+ const char *maj = hd->major_name;
+ unsigned int unit = (minor >> hd->minor_shift);
+ unsigned int part = (minor & ((1 << hd->minor_shift) -1 ));
+
+ if ((unit < hd->nr_real) && hd->part[minor].de) {
+ int pos;
+
+ pos = devfs_generate_path (hd->part[minor].de, buf, 64);
+ if (pos >= 0)
+ return buf + pos;
+ }
+
+#ifdef CONFIG_ARCH_S390
+ if (genhd_dasd_name
+ && genhd_dasd_name (buf, unit, part, hd) == 0)
+ return buf;
+#endif
+ /*
+ * IDE devices use multiple major numbers, but the drives
+ * are named as: {hda,hdb}, {hdc,hdd}, {hde,hdf}, {hdg,hdh}..
+ * This requires special handling here.
+ */
+ switch (hd->major) {
+ case IDE9_MAJOR:
+ unit += 2;
+ case IDE8_MAJOR:
+ unit += 2;
+ case IDE7_MAJOR:
+ unit += 2;
+ case IDE6_MAJOR:
+ unit += 2;
+ case IDE5_MAJOR:
+ unit += 2;
+ case IDE4_MAJOR:
+ unit += 2;
+ case IDE3_MAJOR:
+ unit += 2;
+ case IDE2_MAJOR:
+ unit += 2;
+ case IDE1_MAJOR:
+ unit += 2;
+ case IDE0_MAJOR:
+ maj = "hd";
+ break;
+ case MD_MAJOR:
+ sprintf(buf, "%s%d", maj, unit);
+ return buf;
+ }
+ if (hd->major >= SCSI_DISK1_MAJOR && hd->major <= SCSI_DISK7_MAJOR) {
+ unit = unit + (hd->major - SCSI_DISK1_MAJOR + 1) * 16;
+ if (unit+'a' > 'z') {
+ unit -= 26;
+ sprintf(buf, "sd%c%c", 'a' + unit / 26, 'a' + unit % 26);
+ if (part)
+ sprintf(buf + 4, "%d", part);
+ return buf;
+ }
+ }
+ if (hd->major >= COMPAQ_SMART2_MAJOR && hd->major <= COMPAQ_SMART2_MAJOR+7) {
+ int ctlr = hd->major - COMPAQ_SMART2_MAJOR;
+ if (part == 0)
+ sprintf(buf, "%s/c%dd%d", maj, ctlr, unit);
+ else
+ sprintf(buf, "%s/c%dd%dp%d", maj, ctlr, unit, part);
+ return buf;
+ }
+ if (hd->major >= COMPAQ_CISS_MAJOR && hd->major <= COMPAQ_CISS_MAJOR+7) {
+ int ctlr = hd->major - COMPAQ_CISS_MAJOR;
+ if (part == 0)
+ sprintf(buf, "%s/c%dd%d", maj, ctlr, unit);
+ else
+ sprintf(buf, "%s/c%dd%dp%d", maj, ctlr, unit, part);
+ return buf;
+ }
+ if (hd->major >= DAC960_MAJOR && hd->major <= DAC960_MAJOR+7) {
+ int ctlr = hd->major - DAC960_MAJOR;
+ if (part == 0)
+ sprintf(buf, "%s/c%dd%d", maj, ctlr, unit);
+ else
+ sprintf(buf, "%s/c%dd%dp%d", maj, ctlr, unit, part);
+ return buf;
+ }
+ if (hd->major == ATARAID_MAJOR) {
+ int disk = minor >> hd->minor_shift;
+ int part = minor & (( 1 << hd->minor_shift) - 1);
+ if (part == 0)
+ sprintf(buf, "%s/d%d", maj, disk);
+ else
+ sprintf(buf, "%s/d%dp%d", maj, disk, part);
+ return buf;
+ }
+ if (part)
+ sprintf(buf, "%s%c%d", maj, unit+'a', part);
+ else
+ sprintf(buf, "%s%c", maj, unit+'a');
+ return buf;
+}
+
+/*
+ * Add a partitions details to the devices partition description.
+ */
+void add_gd_partition(struct gendisk *hd, int minor, int start, int size)
+{
+#ifndef CONFIG_DEVFS_FS
+ char buf[40];
+#endif
+
+ hd->part[minor].start_sect = start;
+ hd->part[minor].nr_sects = size;
+#ifdef CONFIG_DEVFS_FS
+ printk(" p%d", (minor & ((1 << hd->minor_shift) - 1)));
+#else
+ if ((hd->major >= COMPAQ_SMART2_MAJOR+0 && hd->major <= COMPAQ_SMART2_MAJOR+7) ||
+ (hd->major >= COMPAQ_CISS_MAJOR+0 && hd->major <= COMPAQ_CISS_MAJOR+7))
+ printk(" p%d", (minor & ((1 << hd->minor_shift) - 1)));
+ else
+ printk(" %s", disk_name(hd, minor, buf));
+#endif
+}
+
+static void check_partition(struct gendisk *hd, kdev_t dev, int first_part_minor)
+{
+ devfs_handle_t de = NULL;
+ static int first_time = 1;
+ unsigned long first_sector;
+ struct block_device *bdev;
+ char buf[64];
+ int i;
+
+ if (CHECK_DEBUG) printk (KERN_ALERT "check.c::check_partition\n");
+
+ if (first_time)
+ printk(KERN_INFO "Partition check:\n");
+ first_time = 0;
+ first_sector = hd->part[MINOR(dev)].start_sect;
+
+ /*
+ * This is a kludge to allow the partition check to be
+ * skipped for specific drives (e.g. IDE CD-ROM drives)
+ */
+ if ((int)first_sector == -1) {
+ hd->part[MINOR(dev)].start_sect = 0;
+ return;
+ }
+
+ if (hd->de_arr)
+ de = hd->de_arr[MINOR(dev) >> hd->minor_shift];
+ i = devfs_generate_path (de, buf, sizeof buf);
+ if (i >= 0)
+ printk(KERN_INFO " /dev/%s:", buf + i);
+ else
+ printk(KERN_INFO " %s:", disk_name(hd, MINOR(dev), buf));
+ bdev = bdget(kdev_t_to_nr(dev));
+ bdev->bd_inode->i_size = (loff_t)hd->part[MINOR(dev)].nr_sects << 9;
+ bdev->bd_inode->i_blkbits = blksize_bits(block_size(dev));
+ for (i = 0; check_part[i]; i++) {
+ int res;
+ res = check_part[i](hd, bdev, first_sector, first_part_minor);
+ if (res) {
+ if (res < 0 && warn_no_part)
+ printk(" unable to read partition table\n");
+ goto setup_devfs;
+ }
+ }
+
+ printk(" unknown partition table\n");
+setup_devfs:
+ invalidate_bdev(bdev, 1);
+ truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+ bdput(bdev);
+ i = first_part_minor - 1;
+ devfs_register_partitions (hd, i, hd->sizes ? 0 : 1);
+}
+
+#ifdef CONFIG_DEVFS_FS
+static void devfs_register_partition (struct gendisk *dev, int minor, int part)
+{
+ int devnum = minor >> dev->minor_shift;
+ devfs_handle_t dir;
+ unsigned int devfs_flags = DEVFS_FL_DEFAULT;
+ char devname[16];
+
+ if (dev->part[minor + part].de) return;
+ dir = devfs_get_parent (dev->part[minor].de);
+ if (!dir) return;
+ if ( dev->flags && (dev->flags[devnum] & GENHD_FL_REMOVABLE) )
+ devfs_flags |= DEVFS_FL_REMOVABLE;
+ sprintf (devname, "part%d", part);
+ dev->part[minor + part].de =
+ devfs_register (dir, devname, devfs_flags,
+ dev->major, minor + part,
+ S_IFBLK | S_IRUSR | S_IWUSR,
+ dev->fops, NULL);
+}
+
+static struct unique_numspace disc_numspace = UNIQUE_NUMBERSPACE_INITIALISER;
+
+static void devfs_register_disc (struct gendisk *dev, int minor)
+{
+ int pos = 0;
+ int devnum = minor >> dev->minor_shift;
+ devfs_handle_t dir, slave;
+ unsigned int devfs_flags = DEVFS_FL_DEFAULT;
+ char dirname[64], symlink[16];
+ static devfs_handle_t devfs_handle;
+
+ if (dev->part[minor].de) return;
+ if ( dev->flags && (dev->flags[devnum] & GENHD_FL_REMOVABLE) )
+ devfs_flags |= DEVFS_FL_REMOVABLE;
+ if (dev->de_arr) {
+ dir = dev->de_arr[devnum];
+ if (!dir) /* Aware driver wants to block disc management */
+ return;
+ pos = devfs_generate_path (dir, dirname + 3, sizeof dirname-3);
+ if (pos < 0) return;
+ strncpy (dirname + pos, "../", 3);
+ }
+ else {
+ /* Unaware driver: construct "real" directory */
+ sprintf (dirname, "../%s/disc%d", dev->major_name, devnum);
+ dir = devfs_mk_dir (NULL, dirname + 3, NULL);
+ }
+ if (!devfs_handle)
+ devfs_handle = devfs_mk_dir (NULL, "discs", NULL);
+ dev->part[minor].number = devfs_alloc_unique_number (&disc_numspace);
+ sprintf (symlink, "disc%d", dev->part[minor].number);
+ devfs_mk_symlink (devfs_handle, symlink, DEVFS_FL_DEFAULT,
+ dirname + pos, &slave, NULL);
+ dev->part[minor].de =
+ devfs_register (dir, "disc", devfs_flags, dev->major, minor,
+ S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL);
+ devfs_auto_unregister (dev->part[minor].de, slave);
+ if (!dev->de_arr)
+ devfs_auto_unregister (slave, dir);
+}
+#endif /* CONFIG_DEVFS_FS */
+
+void devfs_register_partitions (struct gendisk *dev, int minor, int unregister)
+{
+#ifdef CONFIG_DEVFS_FS
+ int part;
+
+ if (!unregister)
+ devfs_register_disc (dev, minor);
+ for (part = 1; part < dev->max_p; part++) {
+ if ( unregister || (dev->part[part + minor].nr_sects < 1) ) {
+ devfs_unregister (dev->part[part + minor].de);
+ dev->part[part + minor].de = NULL;
+ continue;
+ }
+ devfs_register_partition (dev, minor, part);
+ }
+ if (unregister) {
+ devfs_unregister (dev->part[minor].de);
+ dev->part[minor].de = NULL;
+ devfs_dealloc_unique_number (&disc_numspace,
+ dev->part[minor].number);
+ }
+#endif /* CONFIG_DEVFS_FS */
+}
+
+/*
+ * This function will re-read the partition tables for a given device,
+ * and set things back up again. There are some important caveats,
+ * however. You must ensure that no one is using the device, and no one
+ * can start using the device while this function is being executed.
+ *
+ * Much of the cleanup from the old partition tables should have already been
+ * done
+ */
+
+void register_disk(struct gendisk *gdev, kdev_t dev, unsigned minors,
+ struct block_device_operations *ops, long size)
+{
+ if (CHECK_DEBUG)
+ {
+ if (gdev != NULL)
+ printk (KERN_ALERT
+ "check.c::register_disk gdev:%p dev:%d min:%u ops:%p sz:%ld\n",
+ gdev, dev, minors, ops, size);
+ }
+
+ if (!gdev)
+ return;
+
+ grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size);
+}
+
+void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size)
+{
+ int i;
+ int first_minor = drive << dev->minor_shift;
+ int end_minor = first_minor + dev->max_p;
+
+ if (CHECK_DEBUG) printk (KERN_ALERT "check.c::grok_partitions\n");
+
+ if(!dev->sizes)
+ blk_size[dev->major] = NULL;
+
+ dev->part[first_minor].nr_sects = size;
+ /* No such device or no minors to use for partitions */
+ if (!size || minors == 1)
+ return;
+
+ if (dev->sizes) {
+ dev->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9);
+ for (i = first_minor + 1; i < end_minor; i++)
+ dev->sizes[i] = 0;
+ }
+ blk_size[dev->major] = dev->sizes;
+ check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor);
+
+ /*
+ * We need to set the sizes array before we will be able to access
+ * any of the partitions on this device.
+ */
+ if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */
+ for (i = first_minor; i < end_minor; i++)
+ dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9);
+ }
+}
+
+unsigned char *read_dev_sector(struct block_device *bdev, unsigned long n, Sector *p)
+{
+ struct address_space *mapping = bdev->bd_inode->i_mapping;
+ int sect = PAGE_CACHE_SIZE / 512;
+ struct page *page;
+
+ page = read_cache_page(mapping, n/sect,
+ (filler_t *)mapping->a_ops->readpage, NULL);
+ if (!IS_ERR(page)) {
+ wait_on_page(page);
+ if (!Page_Uptodate(page))
+ goto fail;
+ if (PageError(page))
+ goto fail;
+ p->v = page;
+ return (unsigned char *)page_address(page) + 512 * (n % sect);
+fail:
+ page_cache_release(page);
+ }
+ p->v = NULL;
+ return NULL;
+}
diff --git a/xenolinux-2.4.16-sparse/fs/partitions/msdos.c b/xenolinux-2.4.16-sparse/fs/partitions/msdos.c
new file mode 100644
index 0000000000..34a086024e
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/fs/partitions/msdos.c
@@ -0,0 +1,642 @@
+/*
+ * fs/partitions/msdos.c
+ *
+ * Code extracted from drivers/block/genhd.c
+ * Copyright (C) 1991-1998 Linus Torvalds
+ *
+ * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ * in the early extended-partition checks and added DM partitions
+ *
+ * Support for DiskManager v6.0x added by Mark Lord,
+ * with information provided by OnTrack. This now works for linux fdisk
+ * and LILO, as well as loadlin and bootln. Note that disks other than
+ * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1).
+ *
+ * More flexible handling of extended partitions - aeb, 950831
+ *
+ * Check partition table on IDE disks for common CHS translations
+ *
+ * Re-organised Feb 1998 Russell King
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/blk.h>
+
+#ifdef CONFIG_BLK_DEV_IDE
+#include <linux/ide.h> /* IDE xlate */
+#endif /* CONFIG_BLK_DEV_IDE */
+
+#define MSDOS_DEBUG 0
+
+#include <asm/system.h>
+
+#include "check.h"
+#include "msdos.h"
+
+#if CONFIG_BLK_DEV_MD
+extern void md_autodetect_dev(kdev_t dev);
+#endif
+
+/*
+ * Many architectures don't like unaligned accesses, which is
+ * frequently the case with the nr_sects and start_sect partition
+ * table entries.
+ */
+#include <asm/unaligned.h>
+
+#define SYS_IND(p) (get_unaligned(&p->sys_ind))
+#define NR_SECTS(p) ({ __typeof__(p->nr_sects) __a = \
+ get_unaligned(&p->nr_sects); \
+ le32_to_cpu(__a); \
+ })
+
+#define START_SECT(p) ({ __typeof__(p->start_sect) __a = \
+ get_unaligned(&p->start_sect); \
+ le32_to_cpu(__a); \
+ })
+
+static inline int is_extended_partition(struct partition *p)
+{
+ return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
+ SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
+ SYS_IND(p) == LINUX_EXTENDED_PARTITION);
+}
+
+/*
+ * partition_name() formats the short partition name into the supplied
+ * buffer, and returns a pointer to that buffer.
+ * Used by several partition types which makes conditional inclusion messy,
+ * use __attribute__ ((unused)) instead.
+ */
+static char __attribute__ ((unused))
+ *partition_name (struct gendisk *hd, int minor, char *buf)
+{
+#ifdef CONFIG_DEVFS_FS
+ sprintf(buf, "p%d", (minor & ((1 << hd->minor_shift) - 1)));
+ return buf;
+#else
+ return disk_name(hd, minor, buf);
+#endif
+}
+
+#define MSDOS_LABEL_MAGIC1 0x55
+#define MSDOS_LABEL_MAGIC2 0xAA
+
+static inline int
+msdos_magic_present(unsigned char *p)
+{
+ return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2);
+}
+
+/*
+ * Create devices for each logical partition in an extended partition.
+ * The logical partitions form a linked list, with each entry being
+ * a partition table with two entries. The first entry
+ * is the real data partition (with a start relative to the partition
+ * table start). The second is a pointer to the next logical partition
+ * (with a start relative to the entire extended partition).
+ * We do not create a Linux partition for the partition tables, but
+ * only for the actual data partitions.
+ */
+
+static void extended_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, unsigned long first_size, int *current_minor)
+{
+ struct partition *p;
+ Sector sect;
+ unsigned char *data;
+ unsigned long first_sector, this_sector, this_size;
+ int mask = (1 << hd->minor_shift) - 1;
+ int sector_size = get_hardsect_size(to_kdev_t(bdev->bd_dev)) / 512;
+ int loopct = 0; /* number of links followed
+ without finding a data partition */
+ int i;
+
+ this_sector = first_sector = hd->part[minor].start_sect;
+ this_size = first_size;
+
+ while (1) {
+ if (++loopct > 100)
+ return;
+ if ((*current_minor & mask) == 0)
+ return;
+ data = read_dev_sector(bdev, this_sector, &sect);
+ if (!data)
+ return;
+
+ if (!msdos_magic_present(data + 510))
+ goto done;
+
+ p = (struct partition *) (data + 0x1be);
+
+ /*
+ * Usually, the first entry is the real data partition,
+ * the 2nd entry is the next extended partition, or empty,
+ * and the 3rd and 4th entries are unused.
+ * However, DRDOS sometimes has the extended partition as
+ * the first entry (when the data partition is empty),
+ * and OS/2 seems to use all four entries.
+ */
+
+ /*
+ * First process the data partition(s)
+ */
+ for (i=0; i<4; i++, p++) {
+ unsigned long offs, size, next;
+ if (!NR_SECTS(p) || is_extended_partition(p))
+ continue;
+
+ /* Check the 3rd and 4th entries -
+ these sometimes contain random garbage */
+ offs = START_SECT(p)*sector_size;
+ size = NR_SECTS(p)*sector_size;
+ next = this_sector + offs;
+ if (i >= 2) {
+ if (offs + size > this_size)
+ continue;
+ if (next < first_sector)
+ continue;
+ if (next + size > first_sector + first_size)
+ continue;
+ }
+
+ add_gd_partition(hd, *current_minor, next, size);
+#if CONFIG_BLK_DEV_MD
+ if (SYS_IND(p) == LINUX_RAID_PARTITION) {
+ md_autodetect_dev(MKDEV(hd->major,*current_minor));
+ }
+#endif
+
+ (*current_minor)++;
+ loopct = 0;
+ if ((*current_minor & mask) == 0)
+ goto done;
+ }
+ /*
+ * Next, process the (first) extended partition, if present.
+ * (So far, there seems to be no reason to make
+ * extended_partition() recursive and allow a tree
+ * of extended partitions.)
+ * It should be a link to the next logical partition.
+ * Create a minor for this just long enough to get the next
+ * partition table. The minor will be reused for the next
+ * data partition.
+ */
+ p -= 4;
+ for (i=0; i<4; i++, p++)
+ if (NR_SECTS(p) && is_extended_partition(p))
+ break;
+ if (i == 4)
+ goto done; /* nothing left to do */
+
+ this_sector = first_sector + START_SECT(p) * sector_size;
+ this_size = NR_SECTS(p) * sector_size;
+ minor = *current_minor;
+ put_dev_sector(sect);
+ }
+done:
+ put_dev_sector(sect);
+}
+
+/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
+ indicates linux swap. Be careful before believing this is Solaris. */
+
+static void
+solaris_x86_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor)
+{
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+ long offset = hd->part[minor].start_sect;
+ Sector sect;
+ struct solaris_x86_vtoc *v;
+ struct solaris_x86_slice *s;
+ int mask = (1 << hd->minor_shift) - 1;
+ int i;
+ char buf[40];
+
+ v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, &sect);
+ if (!v)
+ return;
+ if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) {
+ put_dev_sector(sect);
+ return;
+ }
+ printk(" %s: <solaris:", partition_name(hd, minor, buf));
+ if (le32_to_cpu(v->v_version) != 1) {
+ printk(" cannot handle version %d vtoc>\n",
+ le32_to_cpu(v->v_version));
+ put_dev_sector(sect);
+ return;
+ }
+ for (i=0; i<SOLARIS_X86_NUMSLICE; i++) {
+ if ((*current_minor & mask) == 0)
+ break;
+ s = &v->v_slice[i];
+
+ if (s->s_size == 0)
+ continue;
+ printk(" [s%d]", i);
+ /* solaris partitions are relative to current MS-DOS
+ * one but add_gd_partition starts relative to sector
+ * zero of the disk. Therefore, must add the offset
+ * of the current partition */
+ add_gd_partition(hd, *current_minor,
+ le32_to_cpu(s->s_start)+offset,
+ le32_to_cpu(s->s_size));
+ (*current_minor)++;
+ }
+ put_dev_sector(sect);
+ printk(" >\n");
+#endif
+}
+
+#ifdef CONFIG_BSD_DISKLABEL
+static void
+check_and_add_bsd_partition(struct gendisk *hd, struct bsd_partition *bsd_p,
+ int minor, int *current_minor)
+{
+ struct hd_struct *lin_p;
+ /* check relative position of partitions. */
+ for (lin_p = hd->part + 1 + minor;
+ lin_p - hd->part - minor < *current_minor; lin_p++) {
+ /* no relationship -> try again */
+ if (lin_p->start_sect + lin_p->nr_sects <= le32_to_cpu(bsd_p->p_offset) ||
+ lin_p->start_sect >= le32_to_cpu(bsd_p->p_offset) + le32_to_cpu(bsd_p->p_size))
+ continue;
+ /* equal -> no need to add */
+ if (lin_p->start_sect == le32_to_cpu(bsd_p->p_offset) &&
+ lin_p->nr_sects == le32_to_cpu(bsd_p->p_size))
+ return;
+ /* bsd living within dos partition */
+ if (lin_p->start_sect <= le32_to_cpu(bsd_p->p_offset) && lin_p->start_sect
+ + lin_p->nr_sects >= le32_to_cpu(bsd_p->p_offset) + le32_to_cpu(bsd_p->p_size)) {
+#ifdef DEBUG_BSD_DISKLABEL
+ printk("w: %d %ld+%ld,%d+%d",
+ lin_p - hd->part,
+ lin_p->start_sect, lin_p->nr_sects,
+ le32_to_cpu(bsd_p->p_offset),
+ le32_to_cpu(bsd_p->p_size));
+#endif
+ break;
+ }
+ /* ouch: bsd and linux overlap. Don't even try for that partition */
+#ifdef DEBUG_BSD_DISKLABEL
+ printk("???: %d %ld+%ld,%d+%d",
+ lin_p - hd->part, lin_p->start_sect, lin_p->nr_sects,
+ le32_to_cpu(bsd_p->p_offset), le32_to_cpu(bsd_p->p_size));
+#endif
+ printk("???");
+ return;
+ } /* if the bsd partition is not currently known to linux, we end
+ * up here
+ */
+ add_gd_partition(hd, *current_minor, le32_to_cpu(bsd_p->p_offset),
+ le32_to_cpu(bsd_p->p_size));
+ (*current_minor)++;
+}
+
+/*
+ * Create devices for BSD partitions listed in a disklabel, under a
+ * dos-like partition. See extended_partition() for more information.
+ */
+static void do_bsd_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor, char *name, int max_partitions)
+{
+ long offset = hd->part[minor].start_sect;
+ Sector sect;
+ struct bsd_disklabel *l;
+ struct bsd_partition *p;
+ int mask = (1 << hd->minor_shift) - 1;
+ char buf[40];
+
+ l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, &sect);
+ if (!l)
+ return;
+ if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) {
+ put_dev_sector(sect);
+ return;
+ }
+ printk(" %s: <%s", partition_name(hd, minor, buf), name);
+
+ if (le16_to_cpu(l->d_npartitions) < max_partitions)
+ max_partitions = le16_to_cpu(l->d_npartitions);
+ for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) {
+ if ((*current_minor & mask) == 0)
+ break;
+ if (p->p_fstype == BSD_FS_UNUSED)
+ continue;
+ check_and_add_bsd_partition(hd, p, minor, current_minor);
+ }
+ put_dev_sector(sect);
+ printk(" >\n");
+}
+#endif
+
+static void bsd_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor)
+{
+#ifdef CONFIG_BSD_DISKLABEL
+ do_bsd_partition(hd, bdev, minor, current_minor, "bsd",
+ BSD_MAXPARTITIONS);
+#endif
+}
+
+static void netbsd_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor)
+{
+#ifdef CONFIG_BSD_DISKLABEL
+ do_bsd_partition(hd, bdev, minor, current_minor, "netbsd",
+ BSD_MAXPARTITIONS);
+#endif
+}
+
+static void openbsd_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor)
+{
+#ifdef CONFIG_BSD_DISKLABEL
+ do_bsd_partition(hd, bdev, minor, current_minor,
+ "openbsd", OPENBSD_MAXPARTITIONS);
+#endif
+}
+
+/*
+ * Create devices for Unixware partitions listed in a disklabel, under a
+ * dos-like partition. See extended_partition() for more information.
+ */
+static void unixware_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor)
+{
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+ long offset = hd->part[minor].start_sect;
+ Sector sect;
+ struct unixware_disklabel *l;
+ struct unixware_slice *p;
+ int mask = (1 << hd->minor_shift) - 1;
+ char buf[40];
+
+ l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, &sect);
+ if (!l)
+ return;
+ if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC ||
+ le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) {
+ put_dev_sector(sect);
+ return;
+ }
+ printk(" %s: <unixware:", partition_name(hd, minor, buf));
+ p = &l->vtoc.v_slice[1];
+ /* I omit the 0th slice as it is the same as whole disk. */
+ while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
+ if ((*current_minor & mask) == 0)
+ break;
+
+ if (p->s_label != UNIXWARE_FS_UNUSED) {
+ add_gd_partition(hd, *current_minor, START_SECT(p),
+ NR_SECTS(p));
+ (*current_minor)++;
+ }
+ p++;
+ }
+ put_dev_sector(sect);
+ printk(" >\n");
+#endif
+}
+
+/*
+ * Minix 2.0.0/2.0.2 subpartition support.
+ * Anand Krishnamurthy <anandk@wiproge.med.ge.com>
+ * Rajeev V. Pillai <rajeevvp@yahoo.com>
+ */
+static void minix_partition(struct gendisk *hd, struct block_device *bdev,
+ int minor, int *current_minor)
+{
+#ifdef CONFIG_MINIX_SUBPARTITION
+ long offset = hd->part[minor].start_sect;
+ Sector sect;
+ unsigned char *data;
+ struct partition *p;
+ int mask = (1 << hd->minor_shift) - 1;
+ int i;
+ char buf[40];
+
+ data = read_dev_sector(bdev, offset, &sect);
+ if (!data)
+ return;
+
+ p = (struct partition *)(data + 0x1be);
+
+ /* The first sector of a Minix partition can have either
+ * a secondary MBR describing its subpartitions, or
+ * the normal boot sector. */
+ if (msdos_magic_present (data + 510) &&
+ SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */
+
+ printk(" %s: <minix:", partition_name(hd, minor, buf));
+ for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) {
+ if ((*current_minor & mask) == 0)
+ break;
+ /* add each partition in use */
+ if (SYS_IND(p) == MINIX_PARTITION) {
+ add_gd_partition(hd, *current_minor,
+ START_SECT(p), NR_SECTS(p));
+ (*current_minor)++;
+ }
+ }
+ printk(" >\n");
+ }
+ put_dev_sector(sect);
+#endif /* CONFIG_MINIX_SUBPARTITION */
+}
+
+static struct {
+ unsigned char id;
+ void (*parse)(struct gendisk *, struct block_device *, int, int *);
+} subtypes[] = {
+ {BSD_PARTITION, bsd_partition},
+ {NETBSD_PARTITION, netbsd_partition},
+ {OPENBSD_PARTITION, openbsd_partition},
+ {MINIX_PARTITION, minix_partition},
+ {UNIXWARE_PARTITION, unixware_partition},
+ {SOLARIS_X86_PARTITION, solaris_x86_partition},
+ {0, NULL},
+};
+/*
+ * Look for various forms of IDE disk geometry translation
+ */
+static int handle_ide_mess(struct block_device *bdev)
+{
+#ifdef CONFIG_BLK_DEV_IDE
+ Sector sect;
+ unsigned char *data;
+ kdev_t dev = to_kdev_t(bdev->bd_dev);
+ unsigned int sig;
+ int heads = 0;
+ struct partition *p;
+ int i;
+
+ if (MSDOS_DEBUG)
+ printk (KERN_ALERT "handle_ide_mess ------------\n");
+
+ /*
+ * The i386 partition handling programs very often
+ * make partitions end on cylinder boundaries.
+ * There is no need to do so, and Linux fdisk doesnt always
+ * do this, and Windows NT on Alpha doesnt do this either,
+ * but still, this helps to guess #heads.
+ */
+ data = read_dev_sector(bdev, 0, &sect);
+ if (!data)
+ return -1;
+ if (!msdos_magic_present(data + 510)) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ sig = le16_to_cpu(*(unsigned short *)(data + 2));
+ p = (struct partition *) (data + 0x1be);
+ for (i = 0; i < 4; i++) {
+ struct partition *q = &p[i];
+ if (NR_SECTS(q)) {
+ if ((q->sector & 63) == 1 &&
+ (q->end_sector & 63) == 63)
+ heads = q->end_head + 1;
+ break;
+ }
+ }
+ if (SYS_IND(p) == EZD_PARTITION) {
+ /*
+ * Accesses to sector 0 must go to sector 1 instead.
+ */
+ if (ide_xlate_1024(dev, -1, heads, " [EZD]"))
+ goto reread;
+ } else if (SYS_IND(p) == DM6_PARTITION) {
+
+ /*
+ * Everything on the disk is offset by 63 sectors,
+ * including a "new" MBR with its own partition table.
+ */
+ if (ide_xlate_1024(dev, 1, heads, " [DM6:DDO]"))
+ goto reread;
+ } else if (sig <= 0x1ae &&
+ data[sig] == 0xAA && data[sig+1] == 0x55 &&
+ (data[sig+2] & 1)) {
+ /* DM6 signature in MBR, courtesy of OnTrack */
+ (void) ide_xlate_1024 (dev, 0, heads, " [DM6:MBR]");
+ } else if (SYS_IND(p) == DM6_AUX1PARTITION ||
+ SYS_IND(p) == DM6_AUX3PARTITION) {
+ /*
+ * DM6 on other than the first (boot) drive
+ */
+ (void) ide_xlate_1024(dev, 0, heads, " [DM6:AUX]");
+ } else {
+ (void) ide_xlate_1024(dev, 2, heads, " [PTBL]");
+ }
+ put_dev_sector(sect);
+
+ if (MSDOS_DEBUG)
+ printk (KERN_ALERT "handle_ide_mess -------- %d\n", heads);
+ return 1;
+
+reread:
+ put_dev_sector(sect);
+ /* Flush the cache */
+ invalidate_bdev(bdev, 1);
+ truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+#endif /* CONFIG_BLK_DEV_IDE */
+ return 1;
+}
+
+int msdos_partition(struct gendisk *hd, struct block_device *bdev,
+ unsigned long first_sector, int first_part_minor)
+{
+ int i, minor = first_part_minor;
+ Sector sect;
+ struct partition *p;
+ unsigned char *data;
+ int mask = (1 << hd->minor_shift) - 1;
+ int sector_size = get_hardsect_size(to_kdev_t(bdev->bd_dev)) / 512;
+ int current_minor = first_part_minor;
+ int err;
+
+ if (MSDOS_DEBUG) printk (KERN_ALERT "msdos.c::msdos_partition\n");
+ err = handle_ide_mess(bdev);
+ if (err <= 0)
+ return err;
+ data = read_dev_sector(bdev, 0, &sect);
+ if (!data)
+ return -1;
+ if (!msdos_magic_present(data + 510)) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ p = (struct partition *) (data + 0x1be);
+
+ /*
+ * Look for partitions in two passes:
+ * First find the primary and DOS-type extended partitions.
+ * On the second pass look inside *BSD, Unixware and Solaris partitions.
+ */
+
+ current_minor += 4;
+ for (i=1 ; i<=4 ; minor++,i++,p++) {
+ if (!NR_SECTS(p))
+ continue;
+ add_gd_partition(hd, minor,
+ first_sector+START_SECT(p)*sector_size,
+ NR_SECTS(p)*sector_size);
+#if CONFIG_BLK_DEV_MD
+ if (SYS_IND(p) == LINUX_RAID_PARTITION) {
+ md_autodetect_dev(MKDEV(hd->major,minor));
+ }
+#endif
+ if (is_extended_partition(p)) {
+ unsigned long size = hd->part[minor].nr_sects;
+ printk(" <");
+ /* prevent someone doing mkfs or mkswap on an
+ extended partition, but leave room for LILO */
+ if (size > 2)
+ hd->part[minor].nr_sects = 2;
+ extended_partition(hd, bdev, minor, size, &current_minor);
+ printk(" >");
+ }
+ }
+
+ /*
+ * Check for old-style Disk Manager partition table
+ */
+ if (msdos_magic_present(data + 0xfc)) {
+ p = (struct partition *) (0x1be + data);
+ for (i = 4 ; i < 16 ; i++, current_minor++) {
+ p--;
+ if ((current_minor & mask) == 0)
+ break;
+ if (!(START_SECT(p) && NR_SECTS(p)))
+ continue;
+ add_gd_partition(hd, current_minor, START_SECT(p), NR_SECTS(p));
+ }
+ }
+ printk("\n");
+
+ /* second pass - output for each on a separate line */
+ minor -= 4;
+ p = (struct partition *) (0x1be + data);
+ for (i=1 ; i<=4 ; minor++,i++,p++) {
+ unsigned char id = SYS_IND(p);
+ int n;
+
+ if (!NR_SECTS(p))
+ continue;
+
+ for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++)
+ ;
+
+ if (subtypes[n].parse)
+ subtypes[n].parse(hd, bdev, minor, &current_minor);
+ }
+ put_dev_sector(sect);
+ return 1;
+}
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
index 8269718ec8..df25598730 100644
--- a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
+++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
@@ -258,6 +258,16 @@ static inline int HYPERVISOR_network_op(void *network_op)
return ret;
}
+static inline int HYPERVISOR_block_io_op(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_block_io_op) );
+
+ return ret;
+}
+
static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
int ret;
diff --git a/xenolinux-2.4.16-sparse/include/linux/blk.h b/xenolinux-2.4.16-sparse/include/linux/blk.h
new file mode 100644
index 0000000000..12eb99ff16
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/include/linux/blk.h
@@ -0,0 +1,416 @@
+#ifndef _BLK_H
+#define _BLK_H
+
+#include <linux/blkdev.h>
+#include <linux/locks.h>
+#include <linux/config.h>
+#include <linux/spinlock.h>
+
+/*
+ * Spinlock for protecting the request queue which
+ * is mucked around with in interrupts on potentially
+ * multiple CPU's..
+ */
+extern spinlock_t io_request_lock;
+
+/*
+ * Initialization functions.
+ */
+extern int isp16_init(void);
+extern int cdu31a_init(void);
+extern int acsi_init(void);
+extern int mcd_init(void);
+extern int mcdx_init(void);
+extern int sbpcd_init(void);
+extern int aztcd_init(void);
+extern int sony535_init(void);
+extern int gscd_init(void);
+extern int cm206_init(void);
+extern int optcd_init(void);
+extern int sjcd_init(void);
+extern int cdi_init(void);
+extern int hd_init(void);
+extern int ide_init(void);
+extern int xd_init(void);
+extern int mfm_init(void);
+extern int loop_init(void);
+extern int md_init(void);
+extern int ap_init(void);
+extern int ddv_init(void);
+extern int z2_init(void);
+extern int swim3_init(void);
+extern int swimiop_init(void);
+extern int amiga_floppy_init(void);
+extern int atari_floppy_init(void);
+extern int ez_init(void);
+extern int bpcd_init(void);
+extern int ps2esdi_init(void);
+extern int jsfd_init(void);
+extern int viodasd_init(void);
+extern int viocd_init(void);
+extern int xlblk_init(void);
+
+#if defined(CONFIG_ARCH_S390)
+extern int dasd_init(void);
+extern int xpram_init(void);
+extern int tapeblock_init(void);
+#endif /* CONFIG_ARCH_S390 */
+
+extern void set_device_ro(kdev_t dev,int flag);
+void add_blkdev_randomness(int major);
+
+extern int floppy_init(void);
+extern void rd_load(void);
+extern int rd_init(void);
+extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start; /* starting block # of image */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
+
+extern unsigned long initrd_start,initrd_end;
+extern int mount_initrd; /* zero if initrd should not be mounted */
+extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */
+void initrd_init(void);
+
+#endif
+
+
+/*
+ * end_request() and friends. Must be called with the request queue spinlock
+ * acquired. All functions called within end_request() _must_be_ atomic.
+ *
+ * Several drivers define their own end_request and call
+ * end_that_request_first() and end_that_request_last()
+ * for parts of the original function. This prevents
+ * code duplication in drivers.
+ */
+
+static inline void blkdev_dequeue_request(struct request * req)
+{
+ list_del(&req->queue);
+}
+
+int end_that_request_first(struct request *req, int uptodate, char *name);
+void end_that_request_last(struct request *req);
+
+#if defined(MAJOR_NR) || defined(IDE_DRIVER)
+
+#undef DEVICE_ON
+#undef DEVICE_OFF
+
+/*
+ * Add entries as needed.
+ */
+
+#ifdef IDE_DRIVER
+
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+#define DEVICE_NAME "ide"
+
+#elif (MAJOR_NR == RAMDISK_MAJOR)
+
+/* ram disk */
+#define DEVICE_NAME "ramdisk"
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_NO_RANDOM
+
+#elif (MAJOR_NR == Z2RAM_MAJOR)
+
+/* Zorro II Ram */
+#define DEVICE_NAME "Z2RAM"
+#define DEVICE_REQUEST do_z2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == FLOPPY_MAJOR)
+
+static void floppy_off(unsigned int nr);
+
+#define DEVICE_NAME "floppy"
+#define DEVICE_INTR do_floppy
+#define DEVICE_REQUEST do_fd_request
+#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 ))
+#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device))
+
+#elif (MAJOR_NR == HD_MAJOR)
+
+/* Hard disk: timeout is 6 seconds. */
+#define DEVICE_NAME "hard disk"
+#define DEVICE_INTR do_hd
+#define TIMEOUT_VALUE (6*HZ)
+#define DEVICE_REQUEST do_hd_request
+#define DEVICE_NR(device) (MINOR(device)>>6)
+
+#elif (SCSI_DISK_MAJOR(MAJOR_NR))
+
+#define DEVICE_NAME "scsidisk"
+#define TIMEOUT_VALUE (2*HZ)
+#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4))
+
+/* Kludge to use the same number for both char and block major numbers */
+#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER)
+
+#define DEVICE_NAME "Multiple devices driver"
+#define DEVICE_REQUEST do_md_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SCSI_TAPE_MAJOR)
+
+#define DEVICE_NAME "scsitape"
+#define DEVICE_INTR do_st
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+
+#elif (MAJOR_NR == OSST_MAJOR)
+
+#define DEVICE_NAME "onstream"
+#define DEVICE_INTR do_osst
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+#define DEVICE_ON(device)
+#define DEVICE_OFF(device)
+
+#elif (MAJOR_NR == SCSI_CDROM_MAJOR)
+
+#define DEVICE_NAME "CD-ROM"
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == XT_DISK_MAJOR)
+
+#define DEVICE_NAME "xt disk"
+#define DEVICE_REQUEST do_xd_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == PS2ESDI_MAJOR)
+
+#define DEVICE_NAME "PS/2 ESDI"
+#define DEVICE_REQUEST do_ps2esdi_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == CDU31A_CDROM_MAJOR)
+
+#define DEVICE_NAME "CDU31A"
+#define DEVICE_REQUEST do_cdu31a_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE))
+
+#define DEVICE_NAME "ACSI"
+#define DEVICE_INTR do_acsi
+#define DEVICE_REQUEST do_acsi_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcd */
+#define DEVICE_REQUEST do_mcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcdx */
+#define DEVICE_REQUEST do_mcdx_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #1"
+#define DEVICE_REQUEST do_sbpcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #2"
+#define DEVICE_REQUEST do_sbpcd2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #3"
+#define DEVICE_REQUEST do_sbpcd3_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #4"
+#define DEVICE_REQUEST do_sbpcd4_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == AZTECH_CDROM_MAJOR)
+
+#define DEVICE_NAME "Aztech CD-ROM"
+#define DEVICE_REQUEST do_aztcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CDU535_CDROM_MAJOR)
+
+#define DEVICE_NAME "SONY-CDU535"
+#define DEVICE_INTR do_cdu535
+#define DEVICE_REQUEST do_cdu535_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR)
+
+#define DEVICE_NAME "Goldstar R420"
+#define DEVICE_REQUEST do_gscd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CM206_CDROM_MAJOR)
+#define DEVICE_NAME "Philips/LMS CD-ROM cm206"
+#define DEVICE_REQUEST do_cm206_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == OPTICS_CDROM_MAJOR)
+
+#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM"
+#define DEVICE_REQUEST do_optcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SANYO_CDROM_MAJOR)
+
+#define DEVICE_NAME "Sanyo H94A CD-ROM"
+#define DEVICE_REQUEST do_sjcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == APBLOCK_MAJOR)
+
+#define DEVICE_NAME "apblock"
+#define DEVICE_REQUEST ap_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DDV_MAJOR)
+
+#define DEVICE_NAME "ddv"
+#define DEVICE_REQUEST ddv_request
+#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS)
+
+#elif (MAJOR_NR == MFM_ACORN_MAJOR)
+
+#define DEVICE_NAME "mfm disk"
+#define DEVICE_INTR do_mfm
+#define DEVICE_REQUEST do_mfm_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == NBD_MAJOR)
+
+#define DEVICE_NAME "nbd"
+#define DEVICE_REQUEST do_nbd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MDISK_MAJOR)
+
+#define DEVICE_NAME "mdisk"
+#define DEVICE_REQUEST mdisk_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DASD_MAJOR)
+
+#define DEVICE_NAME "dasd"
+#define DEVICE_REQUEST do_dasd_request
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+
+#elif (MAJOR_NR == I2O_MAJOR)
+
+#define DEVICE_NAME "I2O block"
+#define DEVICE_REQUEST i2ob_request
+#define DEVICE_NR(device) (MINOR(device)>>4)
+
+#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
+
+#define DEVICE_NAME "ida"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_ida_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == XLBLK_MAJOR)
+
+#define DEVICE_NAME "blk"
+#define DEVICE_REQUEST do_xlblk_request
+/* #define DEVICE_INTR */
+#define DEVICE_NR(device) (MINOR(device))
+
+#endif /* MAJOR_NR == whatever */
+
+/* provide DEVICE_xxx defaults, if not explicitly defined
+ * above in the MAJOR_NR==xxx if-elif tree */
+#ifndef DEVICE_ON
+#define DEVICE_ON(device) do {} while (0)
+#endif
+#ifndef DEVICE_OFF
+#define DEVICE_OFF(device) do {} while (0)
+#endif
+
+#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR)
+#if !defined(IDE_DRIVER)
+
+#ifndef CURRENT
+#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+
+#ifndef DEVICE_NAME
+#define DEVICE_NAME "unknown"
+#endif
+
+#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev)
+
+#ifdef DEVICE_INTR
+static void (*DEVICE_INTR)(void) = NULL;
+#endif
+
+#define SET_INTR(x) (DEVICE_INTR = (x))
+
+#ifdef DEVICE_REQUEST
+static void (DEVICE_REQUEST)(request_queue_t *);
+#endif
+
+#ifdef DEVICE_INTR
+#define CLEAR_INTR SET_INTR(NULL)
+#else
+#define CLEAR_INTR
+#endif
+
+#define INIT_REQUEST \
+ if (QUEUE_EMPTY) {\
+ CLEAR_INTR; \
+ return; \
+ } \
+ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
+ panic(DEVICE_NAME ": request list destroyed"); \
+ if (CURRENT->bh) { \
+ if (!buffer_locked(CURRENT->bh)) \
+ panic(DEVICE_NAME ": block not locked"); \
+ }
+
+#endif /* !defined(IDE_DRIVER) */
+
+
+#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */
+
+#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
+
+static inline void end_request(int uptodate) {
+ struct request *req = CURRENT;
+
+ if (end_that_request_first(req, uptodate, DEVICE_NAME))
+ return;
+
+#ifndef DEVICE_NO_RANDOM
+ add_blkdev_randomness(MAJOR(req->rq_dev));
+#endif
+ DEVICE_OFF(req->rq_dev);
+ blkdev_dequeue_request(req);
+ end_that_request_last(req);
+}
+
+#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
+#endif /* LOCAL_END_REQUEST */
+
+#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */
+#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */
+
+#endif /* _BLK_H */
diff --git a/xenolinux-2.4.16-sparse/include/linux/major.h b/xenolinux-2.4.16-sparse/include/linux/major.h
new file mode 100644
index 0000000000..820bf68dc7
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/include/linux/major.h
@@ -0,0 +1,199 @@
+#ifndef _LINUX_MAJOR_H
+#define _LINUX_MAJOR_H
+
+/*
+ * This file has definitions for major device numbers.
+ * For the device number assignments, see Documentation/devices.txt.
+ */
+
+/* limits */
+
+/*
+ * Important: Don't change this to 256. Major number 255 is and must be
+ * reserved for future expansion into a larger dev_t space.
+ */
+#define MAX_CHRDEV 255
+#define MAX_BLKDEV 255
+
+#define UNNAMED_MAJOR 0
+#define MEM_MAJOR 1
+#define RAMDISK_MAJOR 1
+#define FLOPPY_MAJOR 2
+#define PTY_MASTER_MAJOR 2
+#define IDE0_MAJOR 3
+#define PTY_SLAVE_MAJOR 3
+#define HD_MAJOR IDE0_MAJOR
+#define TTY_MAJOR 4
+#define TTYAUX_MAJOR 5
+#define LP_MAJOR 6
+#define VCS_MAJOR 7
+#define LOOP_MAJOR 7
+#define SCSI_DISK0_MAJOR 8
+#define SCSI_TAPE_MAJOR 9
+#define MD_MAJOR 9
+#define MISC_MAJOR 10
+#define SCSI_CDROM_MAJOR 11
+#define QIC02_TAPE_MAJOR 12
+#define XT_DISK_MAJOR 13
+#define SOUND_MAJOR 14
+#define CDU31A_CDROM_MAJOR 15
+#define JOYSTICK_MAJOR 15
+#define GOLDSTAR_CDROM_MAJOR 16
+#define OPTICS_CDROM_MAJOR 17
+#define SANYO_CDROM_MAJOR 18
+#define CYCLADES_MAJOR 19
+#define CYCLADESAUX_MAJOR 20
+#define MITSUMI_X_CDROM_MAJOR 20
+#define MFM_ACORN_MAJOR 21 /* ARM Linux /dev/mfm */
+#define SCSI_GENERIC_MAJOR 21
+#define Z8530_MAJOR 34
+#define DIGI_MAJOR 23
+#define IDE1_MAJOR 22
+#define DIGICU_MAJOR 22
+#define MITSUMI_CDROM_MAJOR 23
+#define CDU535_CDROM_MAJOR 24
+#define STL_SERIALMAJOR 24
+#define MATSUSHITA_CDROM_MAJOR 25
+#define STL_CALLOUTMAJOR 25
+#define MATSUSHITA_CDROM2_MAJOR 26
+#define QIC117_TAPE_MAJOR 27
+#define MATSUSHITA_CDROM3_MAJOR 27
+#define MATSUSHITA_CDROM4_MAJOR 28
+#define STL_SIOMEMMAJOR 28
+#define ACSI_MAJOR 28
+#define AZTECH_CDROM_MAJOR 29
+#define GRAPHDEV_MAJOR 29 /* SparcLinux & Linux/68k /dev/fb */
+#define SHMIQ_MAJOR 85 /* Linux/mips, SGI /dev/shmiq */
+#define CM206_CDROM_MAJOR 32
+#define IDE2_MAJOR 33
+#define IDE3_MAJOR 34
+#define XPRAM_MAJOR 35 /* expanded storage on S/390 = "slow ram" */
+ /* proposed by Peter */
+#define NETLINK_MAJOR 36
+#define PS2ESDI_MAJOR 36
+#define IDETAPE_MAJOR 37
+#define Z2RAM_MAJOR 37
+#define APBLOCK_MAJOR 38 /* AP1000 Block device */
+#define DDV_MAJOR 39 /* AP1000 DDV block device */
+#define NBD_MAJOR 43 /* Network block device */
+#define RISCOM8_NORMAL_MAJOR 48
+#define DAC960_MAJOR 48 /* 48..55 */
+#define RISCOM8_CALLOUT_MAJOR 49
+#define MKISS_MAJOR 55
+#define DSP56K_MAJOR 55 /* DSP56001 processor device */
+
+#define IDE4_MAJOR 56
+#define IDE5_MAJOR 57
+
+#define LVM_BLK_MAJOR 58 /* Logical Volume Manager */
+
+#define SCSI_DISK1_MAJOR 65
+#define SCSI_DISK2_MAJOR 66
+#define SCSI_DISK3_MAJOR 67
+#define SCSI_DISK4_MAJOR 68
+#define SCSI_DISK5_MAJOR 69
+#define SCSI_DISK6_MAJOR 70
+#define SCSI_DISK7_MAJOR 71
+
+
+#define COMPAQ_SMART2_MAJOR 72
+#define COMPAQ_SMART2_MAJOR1 73
+#define COMPAQ_SMART2_MAJOR2 74
+#define COMPAQ_SMART2_MAJOR3 75
+#define COMPAQ_SMART2_MAJOR4 76
+#define COMPAQ_SMART2_MAJOR5 77
+#define COMPAQ_SMART2_MAJOR6 78
+#define COMPAQ_SMART2_MAJOR7 79
+
+#define SPECIALIX_NORMAL_MAJOR 75
+#define SPECIALIX_CALLOUT_MAJOR 76
+
+#define COMPAQ_CISS_MAJOR 104
+#define COMPAQ_CISS_MAJOR1 105
+#define COMPAQ_CISS_MAJOR2 106
+#define COMPAQ_CISS_MAJOR3 107
+#define COMPAQ_CISS_MAJOR4 108
+#define COMPAQ_CISS_MAJOR5 109
+#define COMPAQ_CISS_MAJOR6 110
+#define COMPAQ_CISS_MAJOR7 111
+
+#define ATARAID_MAJOR 114
+
+#define DASD_MAJOR 94 /* Official assignations from Peter */
+
+#define MDISK_MAJOR 95 /* Official assignations from Peter */
+
+#define I2O_MAJOR 80 /* 80->87 */
+
+#define IDE6_MAJOR 88
+#define IDE7_MAJOR 89
+#define IDE8_MAJOR 90
+#define IDE9_MAJOR 91
+
+#define UBD_MAJOR 98
+
+#define AURORA_MAJOR 79
+
+#define JSFD_MAJOR 99
+
+#define PHONE_MAJOR 100
+
+#define LVM_CHAR_MAJOR 109 /* Logical Volume Manager */
+
+#define UMEM_MAJOR 116 /* http://www.umem.com/ Battery Backed RAM */
+
+#define XLBLK_MAJOR 123 /* XenoLinux Block Device */
+
+#define RTF_MAJOR 150
+#define RAW_MAJOR 162
+
+#define USB_ACM_MAJOR 166
+#define USB_ACM_AUX_MAJOR 167
+#define USB_CHAR_MAJOR 180
+
+#define UNIX98_PTY_MASTER_MAJOR 128
+#define UNIX98_PTY_MAJOR_COUNT 8
+#define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
+
+#define VXVM_MAJOR 199 /* VERITAS volume i/o driver */
+#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */
+#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */
+
+#define MSR_MAJOR 202
+#define CPUID_MAJOR 203
+
+#define OSST_MAJOR 206 /* OnStream-SCx0 SCSI tape */
+
+#define IBM_TTY3270_MAJOR 227 /* Official allocations now */
+#define IBM_FS3270_MAJOR 228
+
+/*
+ * Tests for SCSI devices.
+ */
+
+#define SCSI_DISK_MAJOR(M) ((M) == SCSI_DISK0_MAJOR || \
+ ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR))
+
+#define SCSI_BLK_MAJOR(M) \
+ (SCSI_DISK_MAJOR(M) \
+ || (M) == SCSI_CDROM_MAJOR)
+
+static __inline__ int scsi_blk_major(int m) {
+ return SCSI_BLK_MAJOR(m);
+}
+
+/*
+ * Tests for IDE devices
+ */
+#define IDE_DISK_MAJOR(M) ((M) == IDE0_MAJOR || (M) == IDE1_MAJOR || \
+ (M) == IDE2_MAJOR || (M) == IDE3_MAJOR || \
+ (M) == IDE4_MAJOR || (M) == IDE5_MAJOR || \
+ (M) == IDE6_MAJOR || (M) == IDE7_MAJOR || \
+ (M) == IDE8_MAJOR || (M) == IDE9_MAJOR)
+
+static __inline__ int ide_blk_major(int m)
+{
+ return IDE_DISK_MAJOR(m);
+}
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/init/main.c b/xenolinux-2.4.16-sparse/init/main.c
new file mode 100644
index 0000000000..a48a7773b8
--- /dev/null
+++ b/xenolinux-2.4.16-sparse/init/main.c
@@ -0,0 +1,871 @@
+/*
+ * linux/init/main.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * GK 2/5/95 - Changed to support mounting root fs via NFS
+ * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
+ * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
+ * Simplified starting of init: Michael A. Griffith <grif@acm.org>
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/config.h>
+#include <linux/proc_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/blk.h>
+#include <linux/hdreg.h>
+#include <linux/iobuf.h>
+#include <linux/bootmem.h>
+#include <linux/tty.h>
+
+#include <asm/io.h>
+#include <asm/bugs.h>
+
+#if defined(CONFIG_ARCH_S390)
+#include <asm/s390mach.h>
+#include <asm/ccwcache.h>
+#endif
+
+#ifdef CONFIG_PCI
+#include <linux/pci.h>
+#endif
+
+#ifdef CONFIG_DIO
+#include <linux/dio.h>
+#endif
+
+#ifdef CONFIG_ZORRO
+#include <linux/zorro.h>
+#endif
+
+#ifdef CONFIG_MTRR
+# include <asm/mtrr.h>
+#endif
+
+#ifdef CONFIG_NUBUS
+#include <linux/nubus.h>
+#endif
+
+#ifdef CONFIG_ISAPNP
+#include <linux/isapnp.h>
+#endif
+
+#ifdef CONFIG_IRDA
+extern int irda_proto_init(void);
+extern int irda_device_init(void);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/smp.h>
+#endif
+
+#if defined(CONFIG_KDB)
+#include <linux/kdb.h>
+#endif
+
+/*
+ * Versions of gcc older than that listed below may actually compile
+ * and link okay, but the end product can have subtle run time bugs.
+ * To avoid associated bogus bug reports, we flatly refuse to compile
+ * with a gcc that is known to be too old from the very beginning.
+ */
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91)
+#error Sorry, your GCC is too old. It builds incorrect kernels.
+#endif
+
+extern char _stext, _etext;
+extern char *linux_banner;
+
+static int init(void *);
+
+extern void init_IRQ(void);
+extern void init_modules(void);
+extern void sock_init(void);
+extern void fork_init(unsigned long);
+extern void mca_init(void);
+extern void sbus_init(void);
+extern void ppc_init(void);
+extern void sysctl_init(void);
+extern void signals_init(void);
+extern int init_pcmcia_ds(void);
+
+extern void free_initmem(void);
+
+#ifdef CONFIG_TC
+extern void tc_init(void);
+#endif
+
+extern void ecard_init(void);
+
+#if defined(CONFIG_SYSVIPC)
+extern void ipc_init(void);
+#endif
+
+/*
+ * Boot command-line arguments
+ */
+#define MAX_INIT_ARGS 8
+#define MAX_INIT_ENVS 8
+
+extern void time_init(void);
+extern void softirq_init(void);
+
+int rows, cols;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
+#endif
+
+int root_mountflags = MS_RDONLY;
+char *execute_command;
+char root_device_name[64];
+
+
+static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+static char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+
+static int __init profile_setup(char *str)
+{
+ int par;
+ if (get_option(&str,&par)) prof_shift = par;
+ return 1;
+}
+
+__setup("profile=", profile_setup);
+
+
+static struct dev_name_struct {
+ const char *name;
+ const int num;
+} root_dev_names[] __initdata = {
+ { "nfs", 0x00ff },
+ { "blk", 0x7b00 },
+ { "hda", 0x0300 },
+ { "hdb", 0x0340 },
+ { "loop", 0x0700 },
+ { "hdc", 0x1600 },
+ { "hdd", 0x1640 },
+ { "hde", 0x2100 },
+ { "hdf", 0x2140 },
+ { "hdg", 0x2200 },
+ { "hdh", 0x2240 },
+ { "hdi", 0x3800 },
+ { "hdj", 0x3840 },
+ { "hdk", 0x3900 },
+ { "hdl", 0x3940 },
+ { "hdm", 0x5800 },
+ { "hdn", 0x5840 },
+ { "hdo", 0x5900 },
+ { "hdp", 0x5940 },
+ { "hdq", 0x5A00 },
+ { "hdr", 0x5A40 },
+ { "hds", 0x5B00 },
+ { "hdt", 0x5B40 },
+ { "sda", 0x0800 },
+ { "sdb", 0x0810 },
+ { "sdc", 0x0820 },
+ { "sdd", 0x0830 },
+ { "sde", 0x0840 },
+ { "sdf", 0x0850 },
+ { "sdg", 0x0860 },
+ { "sdh", 0x0870 },
+ { "sdi", 0x0880 },
+ { "sdj", 0x0890 },
+ { "sdk", 0x08a0 },
+ { "sdl", 0x08b0 },
+ { "sdm", 0x08c0 },
+ { "sdn", 0x08d0 },
+ { "sdo", 0x08e0 },
+ { "sdp", 0x08f0 },
+ { "ada", 0x1c00 },
+ { "adb", 0x1c10 },
+ { "adc", 0x1c20 },
+ { "add", 0x1c30 },
+ { "ade", 0x1c40 },
+ { "fd", 0x0200 },
+ { "md", 0x0900 },
+ { "xda", 0x0d00 },
+ { "xdb", 0x0d40 },
+ { "ram", 0x0100 },
+ { "scd", 0x0b00 },
+ { "mcd", 0x1700 },
+ { "cdu535", 0x1800 },
+ { "sonycd", 0x1800 },
+ { "aztcd", 0x1d00 },
+ { "cm206cd", 0x2000 },
+ { "gscd", 0x1000 },
+ { "sbpcd", 0x1900 },
+ { "eda", 0x2400 },
+ { "edb", 0x2440 },
+ { "pda", 0x2d00 },
+ { "pdb", 0x2d10 },
+ { "pdc", 0x2d20 },
+ { "pdd", 0x2d30 },
+ { "pcd", 0x2e00 },
+ { "pf", 0x2f00 },
+ { "apblock", APBLOCK_MAJOR << 8},
+ { "ddv", DDV_MAJOR << 8},
+ { "jsfd", JSFD_MAJOR << 8},
+#if defined(CONFIG_ARCH_S390)
+ { "dasda", (DASD_MAJOR << MINORBITS) },
+ { "dasdb", (DASD_MAJOR << MINORBITS) + (1 << 2) },
+ { "dasdc", (DASD_MAJOR << MINORBITS) + (2 << 2) },
+ { "dasdd", (DASD_MAJOR << MINORBITS) + (3 << 2) },
+ { "dasde", (DASD_MAJOR << MINORBITS) + (4 << 2) },
+ { "dasdf", (DASD_MAJOR << MINORBITS) + (5 << 2) },
+ { "dasdg", (DASD_MAJOR << MINORBITS) + (6 << 2) },
+ { "dasdh", (DASD_MAJOR << MINORBITS) + (7 << 2) },
+#endif
+#if defined(CONFIG_BLK_CPQ_DA) || defined(CONFIG_BLK_CPQ_DA_MODULE)
+ { "ida/c0d0p",0x4800 },
+ { "ida/c0d1p",0x4810 },
+ { "ida/c0d2p",0x4820 },
+ { "ida/c0d3p",0x4830 },
+ { "ida/c0d4p",0x4840 },
+ { "ida/c0d5p",0x4850 },
+ { "ida/c0d6p",0x4860 },
+ { "ida/c0d7p",0x4870 },
+ { "ida/c0d8p",0x4880 },
+ { "ida/c0d9p",0x4890 },
+ { "ida/c0d10p",0x48A0 },
+ { "ida/c0d11p",0x48B0 },
+ { "ida/c0d12p",0x48C0 },
+ { "ida/c0d13p",0x48D0 },
+ { "ida/c0d14p",0x48E0 },
+ { "ida/c0d15p",0x48F0 },
+#endif
+#if defined(CONFIG_BLK_CPQ_CISS_DA) || defined(CONFIG_BLK_CPQ_CISS_DA_MODULE)
+ { "cciss/c0d0p",0x6800 },
+ { "cciss/c0d1p",0x6810 },
+ { "cciss/c0d2p",0x6820 },
+ { "cciss/c0d3p",0x6830 },
+ { "cciss/c0d4p",0x6840 },
+ { "cciss/c0d5p",0x6850 },
+ { "cciss/c0d6p",0x6860 },
+ { "cciss/c0d7p",0x6870 },
+ { "cciss/c0d8p",0x6880 },
+ { "cciss/c0d9p",0x6890 },
+ { "cciss/c0d10p",0x68A0 },
+ { "cciss/c0d11p",0x68B0 },
+ { "cciss/c0d12p",0x68C0 },
+ { "cciss/c0d13p",0x68D0 },
+ { "cciss/c0d14p",0x68E0 },
+ { "cciss/c0d15p",0x68F0 },
+#endif
+ { "nftla", 0x5d00 },
+ { "nftlb", 0x5d10 },
+ { "nftlc", 0x5d20 },
+ { "nftld", 0x5d30 },
+ { "ftla", 0x2c00 },
+ { "ftlb", 0x2c08 },
+ { "ftlc", 0x2c10 },
+ { "ftld", 0x2c18 },
+ { "mtdblock", 0x1f00 },
+ { NULL, 0 }
+};
+
+kdev_t __init name_to_kdev_t(char *line)
+{
+ int base = 0;
+
+ if (strncmp(line,"/dev/",5) == 0) {
+ struct dev_name_struct *dev = root_dev_names;
+ line += 5;
+ do {
+ int len = strlen(dev->name);
+ if (strncmp(line,dev->name,len) == 0) {
+ line += len;
+ base = dev->num;
+ break;
+ }
+ dev++;
+ } while (dev->name);
+ }
+ return to_kdev_t(base + simple_strtoul(line,NULL,base?10:16));
+}
+
+static int __init root_dev_setup(char *line)
+{
+ int i;
+ char ch;
+
+ ROOT_DEV = name_to_kdev_t(line);
+ memset (root_device_name, 0, sizeof root_device_name);
+ if (strncmp (line, "/dev/", 5) == 0) line += 5;
+ for (i = 0; i < sizeof root_device_name - 1; ++i)
+ {
+ ch = line[i];
+ if ( isspace (ch) || (ch == ',') || (ch == '\0') ) break;
+ root_device_name[i] = ch;
+ }
+ return 1;
+}
+
+__setup("root=", root_dev_setup);
+
+static int __init checksetup(char *line)
+{
+ struct kernel_param *p;
+
+ p = &__setup_start;
+ do {
+ int n = strlen(p->str);
+ if (!strncmp(line,p->str,n)) {
+ if (p->setup_func(line+n))
+ return 1;
+ }
+ p++;
+ } while (p < &__setup_end);
+ return 0;
+}
+
+/* this should be approx 2 Bo*oMips to start (note initial shift), and will
+ still work even if initially too large, it will just take slightly longer */
+unsigned long loops_per_jiffy = (1<<12);
+
+/* This is the number of bits of precision for the loops_per_jiffy. Each
+ bit takes on average 1.5/HZ seconds. This (like the original) is a little
+ better than 1% */
+#define LPS_PREC 8
+
+void __init calibrate_delay(void)
+{
+ unsigned long ticks, loopbit;
+ int lps_precision = LPS_PREC;
+
+ loops_per_jiffy = (1<<12);
+
+ printk("Calibrating delay loop... ");
+ while (loops_per_jiffy <<= 1) {
+ /* wait for "start of" clock tick */
+ ticks = jiffies;
+ while (ticks == jiffies)
+ /* nothing */;
+ /* Go .. */
+ ticks = jiffies;
+ __delay(loops_per_jiffy);
+ ticks = jiffies - ticks;
+ if (ticks)
+ break;
+ }
+
+/* Do a binary approximation to get loops_per_jiffy set to equal one clock
+ (up to lps_precision bits) */
+ loops_per_jiffy >>= 1;
+ loopbit = loops_per_jiffy;
+ while ( lps_precision-- && (loopbit >>= 1) ) {
+ loops_per_jiffy |= loopbit;
+ ticks = jiffies;
+ while (ticks == jiffies);
+ ticks = jiffies;
+ __delay(loops_per_jiffy);
+ if (jiffies != ticks) /* longer than 1 tick */
+ loops_per_jiffy &= ~loopbit;
+ }
+
+/* Round the value and print it */
+ printk("%lu.%02lu BogoMIPS\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+}
+
+static int __init readonly(char *str)
+{
+ if (*str)
+ return 0;
+ root_mountflags |= MS_RDONLY;
+ return 1;
+}
+
+static int __init readwrite(char *str)
+{
+ if (*str)
+ return 0;
+ root_mountflags &= ~MS_RDONLY;
+ return 1;
+}
+
+static int __init debug_kernel(char *str)
+{
+ if (*str)
+ return 0;
+ console_loglevel = 10;
+ return 1;
+}
+
+static int __init quiet_kernel(char *str)
+{
+ if (*str)
+ return 0;
+ console_loglevel = 4;
+ return 1;
+}
+
+__setup("ro", readonly);
+__setup("rw", readwrite);
+__setup("debug", debug_kernel);
+__setup("quiet", quiet_kernel);
+
+/*
+ * This is a simple kernel command line parsing function: it parses
+ * the command line, and fills in the arguments/environment to init
+ * as appropriate. Any cmd-line option is taken to be an environment
+ * variable if it contains the character '='.
+ *
+ * This routine also checks for options meant for the kernel.
+ * These options are not given to init - they are for internal kernel use only.
+ */
+static void __init parse_options(char *line)
+{
+ char *next,*quote;
+ int args, envs;
+
+ if (!*line)
+ return;
+ args = 0;
+ envs = 1; /* TERM is set to 'linux' by default */
+ next = line;
+ while ((line = next) != NULL) {
+ quote = strchr(line,'"');
+ next = strchr(line, ' ');
+ while (next != NULL && quote != NULL && quote < next) {
+ /* we found a left quote before the next blank
+ * now we have to find the matching right quote
+ */
+ next = strchr(quote+1, '"');
+ if (next != NULL) {
+ quote = strchr(next+1, '"');
+ next = strchr(next+1, ' ');
+ }
+ }
+ if (next != NULL)
+ *next++ = 0;
+#if defined(CONFIG_KDB)
+ /* kdb, kdb=on, kdb=off, kdb=early */
+ if (strncmp(line, "kdb", 3) == 0) {
+ if (line[3] == '\0') {
+ /* Backward compatibility, kdb with no option means early activation */
+ printk("Boot flag kdb with no options is obsolete, use kdb=early\n");
+ kdb_on = 1;
+ kdb_flags |= KDB_FLAG_EARLYKDB;
+ continue;
+ }
+ if (line[3] == '=') {
+ if (strcmp(line+4, "on") == 0) {
+ kdb_on = 1;
+ continue;
+ }
+ if (strcmp(line+4, "off") == 0) {
+ kdb_on = 0;
+ continue;
+ }
+ if (strcmp(line+4, "early") == 0) {
+ kdb_on = 1;
+ kdb_flags |= KDB_FLAG_EARLYKDB;
+ continue;
+ }
+ printk("Boot flag %s not recognised, assumed to be environment variable\n", line);
+ }
+ }
+#endif
+ if (!strncmp(line,"init=",5)) {
+ line += 5;
+ execute_command = line;
+ /* In case LILO is going to boot us with default command line,
+ * it prepends "auto" before the whole cmdline which makes
+ * the shell think it should execute a script with such name.
+ * So we ignore all arguments entered _before_ init=... [MJ]
+ */
+ args = 0;
+ continue;
+ }
+ if (checksetup(line))
+ continue;
+
+ /*
+ * Then check if it's an environment variable or
+ * an option.
+ */
+ if (strchr(line,'=')) {
+ if (envs >= MAX_INIT_ENVS)
+ break;
+ envp_init[++envs] = line;
+ } else {
+ if (args >= MAX_INIT_ARGS)
+ break;
+ if (*line)
+ argv_init[++args] = line;
+ }
+ }
+ argv_init[args+1] = NULL;
+ envp_init[envs+1] = NULL;
+}
+
+
+extern void setup_arch(char **);
+extern void cpu_idle(void);
+
+unsigned long wait_init_idle;
+
+#ifndef CONFIG_SMP
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static void __init smp_init(void)
+{
+ APIC_init_uniprocessor();
+}
+#else
+#define smp_init() do { } while (0)
+#endif
+
+#else
+
+
+/* Called by boot processor to activate the rest. */
+static void __init smp_init(void)
+{
+ /* Get other processors into their bootup holding patterns. */
+ smp_boot_cpus();
+ wait_init_idle = cpu_online_map;
+ clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
+
+ smp_threads_ready=1;
+ smp_commence();
+
+ /* Wait for the other cpus to set up their idle processes */
+ printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
+ while (wait_init_idle) {
+ cpu_relax();
+ barrier();
+ }
+ printk("All processors have done init_idle\n");
+}
+
+#endif
+
+/*
+ * We need to finalize in a non-__init function or else race conditions
+ * between the root thread and the init thread may cause start_kernel to
+ * be reaped by free_initmem before the root thread has proceeded to
+ * cpu_idle.
+ */
+
+static void rest_init(void)
+{
+ kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
+ unlock_kernel();
+ current->need_resched = 1;
+ cpu_idle();
+}
+
+/*
+ * Activate the first processor.
+ */
+
+asmlinkage void __init start_kernel(void)
+{
+ char * command_line;
+ unsigned long mempages;
+ extern char saved_command_line[];
+/*
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them
+ */
+ lock_kernel();
+ printk(linux_banner);
+ setup_arch(&command_line);
+ printk("Kernel command line: %s\n", saved_command_line);
+ parse_options(command_line);
+ trap_init();
+ init_IRQ();
+ sched_init();
+ softirq_init();
+ time_init();
+
+ /*
+ * HACK ALERT! This is early. We're enabling the console before
+ * we've done PCI setups etc, and console_init() must be aware of
+ * this. But we do want output early, in case something goes wrong.
+ */
+ console_init();
+#ifdef CONFIG_MODULES
+ init_modules();
+#endif
+ if (prof_shift) {
+ unsigned int size;
+ /* only text is profiled */
+ prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
+ prof_len >>= prof_shift;
+
+ size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
+ prof_buffer = (unsigned int *) alloc_bootmem(size);
+ }
+
+ kmem_cache_init();
+ sti();
+ calibrate_delay();
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start && !initrd_below_start_ok &&
+ initrd_start < min_low_pfn << PAGE_SHIFT) {
+ printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
+ "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
+ initrd_start = 0;
+ }
+#endif
+ mem_init();
+ kmem_cache_sizes_init();
+#if defined(CONFIG_KDB)
+ kdb_init();
+ if (KDB_FLAG(EARLYKDB)) {
+ KDB_ENTER();
+ }
+#endif
+ mempages = num_physpages;
+
+ fork_init(mempages);
+ proc_caches_init();
+ vfs_caches_init(mempages);
+ buffer_init(mempages);
+ page_cache_init(mempages);
+#if defined(CONFIG_ARCH_S390)
+ ccwcache_init();
+#endif
+ signals_init();
+#ifdef CONFIG_PROC_FS
+ proc_root_init();
+#endif
+#if defined(CONFIG_SYSVIPC)
+ ipc_init();
+#endif
+ check_bugs();
+ printk("POSIX conformance testing by UNIFIX\n");
+
+ /*
+ * We count on the initial thread going ok
+ * Like idlers init is an unlocked kernel thread, which will
+ * make syscalls (and thus be locked).
+ */
+ smp_init();
+ rest_init();
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static int do_linuxrc(void * shell)
+{
+ static char *argv[] = { "linuxrc", NULL, };
+
+ close(0);close(1);close(2);
+ setsid();
+ (void) open("/dev/console",O_RDWR,0);
+ (void) dup(0);
+ (void) dup(0);
+ return execve(shell, argv, envp_init);
+}
+
+#endif
+
+struct task_struct *child_reaper = &init_task;
+
+static void __init do_initcalls(void)
+{
+ initcall_t *call;
+
+ call = &__initcall_start;
+ do {
+ (*call)();
+ call++;
+ } while (call < &__initcall_end);
+
+ /* Make sure there is no pending stuff from the initcall sequence */
+ flush_scheduled_tasks();
+}
+
+/*
+ * Ok, the machine is now initialized. None of the devices
+ * have been touched yet, but the CPU subsystem is up and
+ * running, and memory and process management works.
+ *
+ * Now we can finally start doing some real work..
+ */
+static void __init do_basic_setup(void)
+{
+
+ /*
+ * Tell the world that we're going to be the grim
+ * reaper of innocent orphaned children.
+ *
+ * We don't want people to have to make incorrect
+ * assumptions about where in the task array this
+ * can be found.
+ */
+ child_reaper = current;
+
+#if defined(CONFIG_MTRR) /* Do this after SMP initialization */
+/*
+ * We should probably create some architecture-dependent "fixup after
+ * everything is up" style function where this would belong better
+ * than in init/main.c..
+ */
+ mtrr_init();
+#endif
+
+#ifdef CONFIG_SYSCTL
+ sysctl_init();
+#endif
+
+ /*
+ * Ok, at this point all CPU's should be initialized, so
+ * we can start looking into devices..
+ */
+#if defined(CONFIG_ARCH_S390)
+ s390_init_machine_check();
+#endif
+
+#ifdef CONFIG_PCI
+ pci_init();
+#endif
+#ifdef CONFIG_SBUS
+ sbus_init();
+#endif
+#if defined(CONFIG_PPC)
+ ppc_init();
+#endif
+#ifdef CONFIG_MCA
+ mca_init();
+#endif
+#ifdef CONFIG_ARCH_ACORN
+ ecard_init();
+#endif
+#ifdef CONFIG_ZORRO
+ zorro_init();
+#endif
+#ifdef CONFIG_DIO
+ dio_init();
+#endif
+#ifdef CONFIG_NUBUS
+ nubus_init();
+#endif
+#ifdef CONFIG_ISAPNP
+ isapnp_init();
+#endif
+#ifdef CONFIG_TC
+ tc_init();
+#endif
+
+ /* Networking initialization needs a process context */
+ sock_init();
+
+ start_context_thread();
+ do_initcalls();
+
+#ifdef CONFIG_IRDA
+ irda_proto_init();
+ irda_device_init(); /* Must be done after protocol initialization */
+#endif
+#ifdef CONFIG_PCMCIA
+ init_pcmcia_ds(); /* Do this last */
+#endif
+}
+
+extern void rd_load(void);
+extern void initrd_load(void);
+
+/*
+ * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
+ */
+static void prepare_namespace(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ int real_root_mountflags = root_mountflags;
+ if (!initrd_start)
+ mount_initrd = 0;
+ if (mount_initrd)
+ root_mountflags &= ~MS_RDONLY;
+ real_root_dev = ROOT_DEV;
+#endif
+
+#ifdef CONFIG_BLK_DEV_RAM
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (mount_initrd)
+ initrd_load();
+ else
+#endif
+ rd_load();
+#endif
+
+ /* Mount the root filesystem.. */
+ mount_root();
+
+ mount_devfs_fs ();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ root_mountflags = real_root_mountflags;
+ if (mount_initrd && ROOT_DEV != real_root_dev
+ && MAJOR(ROOT_DEV) == RAMDISK_MAJOR && MINOR(ROOT_DEV) == 0) {
+ int error;
+ int i, pid;
+
+ pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
+ if (pid > 0) {
+ while (pid != wait(&i)) {
+ current->policy |= SCHED_YIELD;
+ schedule();
+ }
+ }
+ if (MAJOR(real_root_dev) != RAMDISK_MAJOR
+ || MINOR(real_root_dev) != 0) {
+ error = change_root(real_root_dev,"/initrd");
+ if (error)
+ printk(KERN_ERR "Change root to /initrd: "
+ "error %d\n",error);
+ }
+ }
+#endif
+}
+
+static int init(void * unused)
+{
+ lock_kernel();
+ do_basic_setup();
+
+ prepare_namespace();
+
+ /*
+ * Ok, we have completed the initial bootup, and
+ * we're essentially up and running. Get rid of the
+ * initmem segments and start the user-mode stuff..
+ */
+ free_initmem();
+ unlock_kernel();
+
+ if (open("/dev/console", O_RDWR, 0) < 0)
+ printk("Warning: unable to open an initial console.\n");
+
+ (void) dup(0);
+ (void) dup(0);
+
+ /*
+ * We try each of these until one succeeds.
+ *
+ * The Bourne shell can be used instead of init if we are
+ * trying to recover a really broken machine.
+ */
+
+ if (execute_command)
+ execve(execute_command,argv_init,envp_init);
+ execve("/sbin/init",argv_init,envp_init);
+ execve("/etc/init",argv_init,envp_init);
+ execve("/bin/init",argv_init,envp_init);
+ execve("/bin/sh",argv_init,envp_init);
+ panic("No init found. Try passing init= option to kernel.");
+}