diff options
author | rn@wyvis.camb.intel-research.net <rn@wyvis.camb.intel-research.net> | 2003-02-13 15:58:34 +0000 |
---|---|---|
committer | rn@wyvis.camb.intel-research.net <rn@wyvis.camb.intel-research.net> | 2003-02-13 15:58:34 +0000 |
commit | 87b3f71e1f82bad43200499787a5e2c1e6fbae17 (patch) | |
tree | 68628511517767c333d7715942daa16d9cca05b3 | |
parent | febde6d3526433e019a8bee635cff9dabeaf053b (diff) | |
parent | 4b929a32df3556bce2860e54c460502c751d18a4 (diff) | |
download | xen-87b3f71e1f82bad43200499787a5e2c1e6fbae17.tar.gz xen-87b3f71e1f82bad43200499787a5e2c1e6fbae17.tar.bz2 xen-87b3f71e1f82bad43200499787a5e2c1e6fbae17.zip |
bitkeeper revision 1.27 (3e4bc0aaYLPRPEot-3f6sspi3HC6Xg)
Merge with recent checkins
52 files changed, 8154 insertions, 571 deletions
@@ -59,16 +59,23 @@ 3ddb79beME_0abStePF6fU8XLuQnWw xen-2.4.16/drivers/block/elevator.c 3ddb79beNQVrdGyoI4njXhgAjD6a4A xen-2.4.16/drivers/block/genhd.c 3ddb79beyWwLRP_BiM2t1JKgr_plEw xen-2.4.16/drivers/block/ll_rw_blk.c +3e4a8cb7RhubVgsPwO7cK0pgAN8WCQ xen-2.4.16/drivers/block/xen_block.c +3e4a8cb7alzQCDKS7MlioPoHBKYkdQ xen-2.4.16/drivers/char/Makefile +3e4a8cb7WmiYdC-ASGiCSG_CL8vsqg xen-2.4.16/drivers/char/xen_kbd.c +3e4a8cb7nMChlro4wvOBo76n__iCFA xen-2.4.16/drivers/char/xen_serial.c 3ddb79bdhcqD9ebrslr0O0oHqTiiXg xen-2.4.16/drivers/ide/Makefile 3ddb79bdErDn_WC3G-fWxKNR3viLnA xen-2.4.16/drivers/ide/ide-disk.c +3e4a8cb7DcFFHW_fG_OHbY_6f3lPWw xen-2.4.16/drivers/ide/ide-disk.c.orig 3ddb79bdIPNW36FrlId94jTXaW8HoA xen-2.4.16/drivers/ide/ide-dma.c 3ddb79be5Ysvhn4se_Z-LQY_hI6UPw xen-2.4.16/drivers/ide/ide-features.c 3ddb79bdh1ohsWYRH_KdaXr7cqs12w xen-2.4.16/drivers/ide/ide-geometry.c 3ddb79bdYcxXT-2UEaDcG0Ic4MIK1g xen-2.4.16/drivers/ide/ide-pci.c 3ddb79bdOXTbcImJo8DwmlNX88k78Q xen-2.4.16/drivers/ide/ide-probe.c 3ddb79bdDWFwINnKn29RlFDwGJhjYg xen-2.4.16/drivers/ide/ide-taskfile.c +3e4a8d40XMqvT05EwZwJg1HMsFDUBA xen-2.4.16/drivers/ide/ide-xeno.c 3ddb79bdkDY1bSOYkToP1Cc49VdBxg xen-2.4.16/drivers/ide/ide.c 3ddb79bdPyAvT_WZTAFhaX0jp-yXSw xen-2.4.16/drivers/ide/ide_modes.h +3e4a8d401aSwOzCScQXR3lsmNlAwUQ xen-2.4.16/drivers/ide/piix.c 3ddb79bfogeJNHTIepPjd8fy1TyoTw xen-2.4.16/drivers/net/3c509.c 3ddb79bfMlOcWUwjtg6oMYhGySHDDw xen-2.4.16/drivers/net/3c59x.c 3ddb79bfl_DWxZQFKiJ2BXrSedV4lg xen-2.4.16/drivers/net/8139cp.c @@ -271,9 +278,12 @@ 3ddb79b7v_Be34as7_mlzFlw65hOjQ xenolinux-2.4.16-sparse/arch/xeno/defconfig 3ddb79b7KUvtx0knQJoRaBDZQeNidg xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile 3ddb79b6Rc0uAOGFthIFxq1KGWZ_Iw xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c +3e4a8cb7JECr--r1ipnrkd7NKdbUqQ xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c +3e4a8cb7SLWsLTXQjv7ng6-3hL4pCA xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c 3ddb79b7LLVJBGynxHSOh9A9l97sug xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile 3ddb79b7UG2QiRAU-Wvc1Y_BLigu1Q xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c 3ddb79b75eo4PRXkT6Th9popt_SJhg xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile +3e4a8cb79dT0F4q5T4GEqMj4CtAquQ xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c 3ddb79b7Xyaoep6U0kLvx6Kx7OauDw xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c 3df9ce13K7qSLBtHV-01QHPW62649Q xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_memory.c 3ddb79b7PulSkF9m3c7K5MkxHRf4hA xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h @@ -317,10 +327,13 @@ 3ddb79b83Zj7Xn2QVhU4HeMuAC9FjA xenolinux-2.4.16-sparse/arch/xeno/mm/init.c 3df9ce13TRWIv0Mawm15zESP7jcT7A xenolinux-2.4.16-sparse/arch/xeno/mm/mmu_context.c 3ddb79b7aKdTkbr3u6aze8tVwGh_TQ xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds +3e4a8cb7lpFFwT1Iu9zXWc8Ew4klFA xenolinux-2.4.16-sparse/drivers/block/Config.in 3ddb79bbx682YH6vR2zbVOXwg73ULg xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c 3ddb79bcJfHdwrPsjqgI33_OsGdVCg xenolinux-2.4.16-sparse/drivers/block/rd.c 3ddb79bcpVu-IbnqwQqpRqsEbLpsuw xenolinux-2.4.16-sparse/drivers/char/tty_io.c 3e15d5273gfR2fbcYe05kqBSAvCX_w xenolinux-2.4.16-sparse/fs/exec.c +3e4a8cb7kqfJTMeOpPcYxqxv7N18DA xenolinux-2.4.16-sparse/fs/partitions/check.c +3e4a8cb7p079Xxly4uNcouacMSjJLw xenolinux-2.4.16-sparse/fs/partitions/msdos.c 3ddb79b8VFtfWSCrXKPN2K21zd_vtw xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h 3ddb79b8Zzi13p3OAPV25QgiC3THAQ xenolinux-2.4.16-sparse/include/asm-xeno/apic.h 3ddb79baZDlsdV_m6C5CXnWMl15p1g xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h @@ -426,7 +439,10 @@ 3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h 3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h 3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h +3e4a8cb7ON8EclY3NN3YPXyMT941hA xenolinux-2.4.16-sparse/include/linux/blk.h +3e4a8cb7GJrKD0z7EF0VZOhdEa01Mw xenolinux-2.4.16-sparse/include/linux/major.h 3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h +3e4a8cb7j05wwb1uPZgY16s68o7qAw xenolinux-2.4.16-sparse/init/main.c 3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c 3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk 3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore index 4fe10ce2d4..e5be6b0ec8 100644 --- a/BitKeeper/etc/ignore +++ b/BitKeeper/etc/ignore @@ -3,3 +3,19 @@ PENDING/* xen-2.4.16/common/kernel.c.old xen-2.4.16/common/kernel.c.ok-ish xen-2.4.16/size.image +xen-2.4.16/drivers/block/ll_rw_blk.c.orig +xen-2.4.16/drivers/ide/ide-disk.c.orig +xen-2.4.16/drivers/ide/ide-probe.c.orig +xen-2.4.16/drivers/ide/ide-taskfile.c.orig +xen-2.4.16/drivers/ide/ide.c.orig +xen-2.4.16/drivers/net/e1000/e1000.o +xen-2.4.16/drivers/net/e1000/e1000_ethtool.o +xen-2.4.16/drivers/net/e1000/e1000_hw.o +xen-2.4.16/drivers/net/e1000/e1000_main.o +xen-2.4.16/drivers/net/e1000/e1000_param.o +xen-2.4.16/include/hypervisor-ifs/block.h.orig +xen-2.4.16/include/xeno/blkdev.h.orig +xen-2.4.16/include/xeno/sched.h.orig +xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile.orig +xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c.orig +xenolinux-2.4.16-sparse/scripts/kconfig.tk diff --git a/xen-2.4.16/Rules.mk b/xen-2.4.16/Rules.mk index 8bea789439..33fb3d314b 100644 --- a/xen-2.4.16/Rules.mk +++ b/xen-2.4.16/Rules.mk @@ -15,6 +15,7 @@ OBJS += $(patsubst %.c,%.o,$(C_SRCS)) # Note that link order matters! ALL_OBJS := $(BASEDIR)/common/common.o ALL_OBJS += $(BASEDIR)/net/network.o +ALL_OBJS += $(BASEDIR)/drivers/char/driver.o ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o ALL_OBJS += $(BASEDIR)/drivers/net/driver.o ALL_OBJS += $(BASEDIR)/drivers/block/driver.o diff --git a/xen-2.4.16/arch/i386/entry.S b/xen-2.4.16/arch/i386/entry.S index 34c8027eb0..928a96ed4e 100644 --- a/xen-2.4.16/arch/i386/entry.S +++ b/xen-2.4.16/arch/i386/entry.S @@ -524,6 +524,7 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(kill_domain) .long SYMBOL_NAME(do_dom0_op) .long SYMBOL_NAME(do_network_op) + .long SYMBOL_NAME(do_block_io_op) .long SYMBOL_NAME(do_set_debugreg) .long SYMBOL_NAME(do_get_debugreg) .long SYMBOL_NAME(do_update_descriptor) diff --git a/xen-2.4.16/arch/i386/io_apic.c b/xen-2.4.16/arch/i386/io_apic.c index c5ae3a3107..8ba37e3903 100644 --- a/xen-2.4.16/arch/i386/io_apic.c +++ b/xen-2.4.16/arch/i386/io_apic.c @@ -28,11 +28,23 @@ #include <xeno/config.h> #include <asm/mc146818rtc.h> #include <asm/io.h> -#include <asm/desc.h> #include <asm/smp.h> +#include <asm/desc.h> +#include <asm/smpboot.h> + + +static unsigned int nmi_watchdog; /* XXXX XEN */ + +#undef APIC_LOCKUP_DEBUG + +#define APIC_LOCKUP_DEBUG static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; +unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL; +unsigned char int_delivery_mode = dest_LowestPrio; + + /* * # of IRQ routing registers */ @@ -47,6 +59,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; /* * This is performance-critical, we want to do it O(1) + * * the indexing order of this array favors 1:1 mappings * between pins and IRQs. */ @@ -60,7 +73,7 @@ static struct irq_pin_list { * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) +static void __init add_pin_to_irq(unsigned int irq, int apic, int pin) { static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; @@ -78,6 +91,26 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } +/* + * Reroute an IRQ to a different pin. + */ +static void __init replace_pin_at_irq(unsigned int irq, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_pin_list *entry = irq_2_pin + irq; + + while (1) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + } + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +} + #define __DO_ACTION(R, ACTION, FINAL) \ \ { \ @@ -157,6 +190,66 @@ static void clear_IO_APIC (void) } /* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +int pirq_entries [MAX_PIRQS]; +int pirqs_enabled; + +int skip_ioapic_setup; +#if 0 + +static int __init noioapic_setup(char *str) +{ + skip_ioapic_setup = 1; + return 1; +} + +__setup("noapic", noioapic_setup); + +static int __init ioapic_setup(char *str) +{ + skip_ioapic_setup = 0; + return 1; +} + +__setup("apic", ioapic_setup); + + + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); + +#endif + +/* * Find the IRQ entry number of a certain pin. */ static int __init find_irq_entry(int apic, int pin, int type) @@ -206,7 +299,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) { printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } @@ -466,6 +559,20 @@ static int pin_2_irq(int idx, int apic, int pin) } } + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } return irq; } @@ -495,11 +602,17 @@ static int __init assign_irq_vector(int irq) return IO_APIC_VECTOR(irq); next: current_vector += 8; + /* XXX Skip the guestOS -> Xen syscall vector! XXX */ if (current_vector == HYPERVISOR_CALL_VECTOR) goto next; /* XXX Skip the Linux/BSD fast-trap vector! XXX */ if (current_vector == 0x80) goto next; +#if 0 + if (current_vector == SYSCALL_VECTOR) + goto next; +#endif + if (current_vector > FIRST_SYSTEM_VECTOR) { offset++; current_vector = FIRST_DEVICE_VECTOR + offset; @@ -532,10 +645,10 @@ void __init setup_IO_APIC_irqs(void) */ memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_LowestPrio; - entry.dest_mode = INT_DELIVERY_MODE; + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = (INT_DEST_ADDR_MODE != 0); entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = TARGET_CPUS; + entry.dest.logical.logical_dest = target_cpus(); idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -553,11 +666,18 @@ void __init setup_IO_APIC_irqs(void) if (irq_trigger(idx)) { entry.trigger = 1; entry.mask = 1; - entry.dest.logical.logical_dest = TARGET_CPUS; } irq = pin_2_irq(idx, apic, pin); - add_pin_to_irq(irq, apic, pin); + /* + * skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum + */ + if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + && (apic != 0) && (irq == 0)) + continue; + else + add_pin_to_irq(irq, apic, pin); if (!apic && !IO_APIC_IRQ(irq)) continue; @@ -607,16 +727,16 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = INT_DELIVERY_MODE; + entry.dest_mode = (INT_DEST_ADDR_MODE != 0); entry.mask = 0; /* unmask IRQ now */ - entry.dest.logical.logical_dest = TARGET_CPUS; - entry.delivery_mode = dest_LowestPrio; + entry.dest.logical.logical_dest = target_cpus(); + entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; /* - * The timer IRQ doesnt have to know that behind the + * The timer IRQ doesn't have to know that behind the * scene we have a 8259A-master in AEOI mode ... */ irq_desc[0].handler = &ioapic_edge_irq_type; @@ -634,8 +754,9 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) void __init UNEXPECTED_IO_APIC(void) { - printk(KERN_WARNING " WARNING: unexpected IO-APIC, please mail\n"); - printk(KERN_WARNING " to linux-smp@vger.kernel.org\n"); + printk(KERN_WARNING + "An unexpected IO-APIC was found. If this kernel release is less than\n" + "three months old please report this to linux-smp@vger.kernel.org\n"); } void __init print_IO_APIC(void) @@ -667,7 +788,7 @@ void __init print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d..XXXX....\n", mp_ioapics[apic].mpc_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID); if (reg_00.__reserved_1 || reg_00.__reserved_2) @@ -688,6 +809,7 @@ void __init print_IO_APIC(void) printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ); printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version); if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ + (reg_01.version != 0x02) && /* VIA */ (reg_01.version != 0x10) && /* oldest IO-APICs */ (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ (reg_01.version != 0x13) && /* Xeon IO-APICs */ @@ -898,6 +1020,9 @@ static void __init enable_IO_APIC(void) irq_2_pin[i].pin = -1; irq_2_pin[i].next = 0; } + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; /* * The number of IO-APIC IRQ registers (== #pins): @@ -944,6 +1069,9 @@ static void __init setup_ioapic_ids_from_mpc (void) unsigned char old_id; unsigned long flags; + if (clustered_apic_mode) + /* We don't have a good way to do this yet - hack */ + phys_id_present_map = (u_long) 0xf; /* * Set the IOAPIC ID to the value stored in the MPC table. */ @@ -956,7 +1084,7 @@ static void __init setup_ioapic_ids_from_mpc (void) old_id = mp_ioapics[apic].mpc_apicid; - if (mp_ioapics[apic].mpc_apicid >= 0xf) { + if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", apic, mp_ioapics[apic].mpc_apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", @@ -968,14 +1096,16 @@ static void __init setup_ioapic_ids_from_mpc (void) * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. + * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs. */ - if (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid)) { + if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) && + (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic, mp_ioapics[apic].mpc_apicid); for (i = 0; i < 0xf; i++) if (!(phys_id_present_map & (1 << i))) break; - if (i >= 0xf) + if (i >= apic_broadcast_id) panic("Max APIC ID exceeded!\n"); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); @@ -1170,6 +1300,10 @@ static void end_level_ioapic_irq (unsigned int irq) #ifdef APIC_LOCKUP_DEBUG struct irq_pin_list *entry; #endif + +#ifdef APIC_MISMATCH_DEBUG + atomic_inc(&irq_mis_count); +#endif spin_lock(&ioapic_lock); __mask_and_edge_IO_APIC_irq(irq); #ifdef APIC_LOCKUP_DEBUG @@ -1302,6 +1436,36 @@ static struct hw_interrupt_type lapic_irq_type = { end_lapic_irq }; +static void enable_NMI_through_LVT0 (void * dummy) +{ + unsigned int v, ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + v = APIC_DM_NMI; /* unmask and set to NMI */ + if (!APIC_INTEGRATED(ver)) /* 82489DX */ + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write_around(APIC_LVT0, v); +} + +static void setup_nmi (void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + printk(KERN_INFO "activating NMI Watchdog ..."); + + smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1); + enable_NMI_through_LVT0(NULL); + + printk(" done.\n"); +} /* * This looks a bit hackish but it's about the only one way of sending @@ -1407,6 +1571,12 @@ static inline void check_timer(void) */ unmask_IO_APIC_irq(0); if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + // XXX Xen check_nmi_watchdog(); + } return; } clear_IO_APIC_pin(0, pin1); @@ -1422,6 +1592,14 @@ static inline void check_timer(void) setup_ExtINT_IRQ0_pin(pin2, vector); if (timer_irq_works()) { printk("works.\n"); + if (pin1 != -1) + replace_pin_at_irq(0, 0, pin1, 0, pin2); + else + add_pin_to_irq(0, 0, pin2); + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + // XXX Xen check_nmi_watchdog(); + } return; } /* @@ -1431,6 +1609,11 @@ static inline void check_timer(void) } printk(" failed.\n"); + if (nmi_watchdog) { + printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); + nmi_watchdog = 0; + } + printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); disable_8259A_irq(0); @@ -1462,10 +1645,19 @@ static inline void check_timer(void) } /* + * * IRQ's that are handled by the old PIC in all cases: * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. * Linux doesn't really care, as it's not actually used * for any interrupt handling anyway. + * - There used to be IRQ13 here as well, but all + * MPS-compliant must not use it for FPU coupling and we + * want to use exception 16 anyway. And there are + * systems who connect it to an I/O APIC for other uses. + * Thus we don't mark it special any longer. + * + * Additionally, something is definitely wrong with irq9 + * on PIIX4 boards. */ #define PIC_IRQS (1<<2) diff --git a/xen-2.4.16/arch/i386/mpparse.c b/xen-2.4.16/arch/i386/mpparse.c index c5cf58a312..4f0edeea0e 100644 --- a/xen-2.4.16/arch/i386/mpparse.c +++ b/xen-2.4.16/arch/i386/mpparse.c @@ -20,6 +20,10 @@ #include <xeno/smp.h> #include <asm/mpspec.h> #include <asm/pgalloc.h> +#include <asm/smpboot.h> +#include <xeno/kernel.h> + +int numnodes = 1; /* XXX Xen */ /* Have we found an MP table */ int smp_found_config; @@ -29,16 +33,20 @@ int smp_found_config; * MP-table. */ int apic_version [MAX_APICS]; -int mp_bus_id_to_type [MAX_MP_BUSSES]; -int mp_bus_id_to_node [MAX_MP_BUSSES]; -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; int mp_current_pci_id; +int *mp_bus_id_to_type; +int *mp_bus_id_to_node; +int *mp_bus_id_to_local; +int *mp_bus_id_to_pci_bus; +int max_mp_busses; +int max_irq_sources; /* I/O APIC entries */ struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mpc_config_intsrc *mp_irqs; /* MP IRQ source entries */ int mp_irq_entries; @@ -56,23 +64,32 @@ static unsigned int num_processors; /* Bitmask of physically existing CPUs */ unsigned long phys_cpu_present_map; +unsigned long logical_cpu_present_map; + +#ifdef CONFIG_X86_CLUSTERED_APIC +unsigned char esr_disable = 0; +unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE; +unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC; +#endif +unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; /* * Intel MP BIOS table parsing routines: */ +#ifndef CONFIG_X86_VISWS_APIC /* * Checksum an MP configuration block. */ static int __init mpf_checksum(unsigned char *mp, int len) { - int sum = 0; + int sum = 0; - while (len--) - sum += *mp++; + while (len--) + sum += *mp++; - return sum & 0xFF; + return sum & 0xFF; } /* @@ -81,38 +98,47 @@ static int __init mpf_checksum(unsigned char *mp, int len) static char __init *mpc_family(int family,int model) { - static char n[32]; - static char *model_defs[]= - { - "80486DX","80486DX", - "80486SX","80486DX/2 or 80487", - "80486SL","80486SX/2", - "Unknown","80486DX/2-WB", - "80486DX/4","80486DX/4-WB" - }; - - switch (family) { - case 0x04: - if (model < 10) - return model_defs[model]; - break; - - case 0x05: - return("Pentium(tm)"); - - case 0x06: - return("Pentium(tm) Pro"); - - case 0x0F: - if (model == 0x00) - return("Pentium 4(tm)"); - if (model == 0x0F) - return("Special controller"); - } - sprintf(n,"Unknown CPU [%d:%d]",family, model); - return n; + static char n[32]; + static char *model_defs[]= + { + "80486DX","80486DX", + "80486SX","80486DX/2 or 80487", + "80486SL","80486SX/2", + "Unknown","80486DX/2-WB", + "80486DX/4","80486DX/4-WB" + }; + + switch (family) { + case 0x04: + if (model < 10) + return model_defs[model]; + break; + + case 0x05: + return("Pentium(tm)"); + + case 0x06: + return("Pentium(tm) Pro"); + + case 0x0F: + if (model == 0x00) + return("Pentium 4(tm)"); + if (model == 0x02) + return("Pentium 4(tm) XEON(tm)"); + if (model == 0x0F) + return("Special controller"); + } + sprintf(n,"Unknown CPU [%d:%d]",family, model); + return n; } +#ifdef CONFIG_X86_IO_APIC +// XXX Xen extern int have_acpi_tables; /* set by acpitable.c */ +#define have_acpi_tables (0) +#else +#define have_acpi_tables (0) +#endif + /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of @@ -120,127 +146,256 @@ static char __init *mpc_family(int family,int model) */ static int mpc_record; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; void __init MP_processor_info (struct mpc_config_processor *m) { - int ver, logical_apicid; + int ver, quad, logical_apicid; - if (!(m->mpc_cpuflag & CPU_ENABLED)) - return; - - logical_apicid = m->mpc_apicid; - printk("Processor #%d %s APIC version %d\n", - m->mpc_apicid, - mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), - m->mpc_apicver); - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - Dprintk(" Bootup CPU\n"); - boot_cpu_physical_apicid = m->mpc_apicid; - boot_cpu_logical_apicid = logical_apicid; - } - - num_processors++; - - if (m->mpc_apicid > MAX_APICS) { - printk("Processor #%d INVALID. (Max ID: %d).\n", - m->mpc_apicid, MAX_APICS); - return; - } - ver = m->mpc_apicver; - - phys_cpu_present_map |= 1 << m->mpc_apicid; - - /* - * Validate version - */ - if (ver == 0x0) { - printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + logical_apicid = m->mpc_apicid; + if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + quad = translation_table[mpc_record]->trans_quad; + logical_apicid = (quad << 4) + + (m->mpc_apicid ? m->mpc_apicid << 1 : 1); + printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver, quad, logical_apicid); + } else { + printk("Processor #%d %s APIC version %d\n", + m->mpc_apicid, + mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver); + } + + if (m->mpc_featureflag&(1<<0)) + Dprintk(" Floating point unit present.\n"); + if (m->mpc_featureflag&(1<<7)) + Dprintk(" Machine Exception supported.\n"); + if (m->mpc_featureflag&(1<<8)) + Dprintk(" 64 bit compare & exchange supported.\n"); + if (m->mpc_featureflag&(1<<9)) + Dprintk(" Internal APIC present.\n"); + if (m->mpc_featureflag&(1<<11)) + Dprintk(" SEP present.\n"); + if (m->mpc_featureflag&(1<<12)) + Dprintk(" MTRR present.\n"); + if (m->mpc_featureflag&(1<<13)) + Dprintk(" PGE present.\n"); + if (m->mpc_featureflag&(1<<14)) + Dprintk(" MCA present.\n"); + if (m->mpc_featureflag&(1<<15)) + Dprintk(" CMOV present.\n"); + if (m->mpc_featureflag&(1<<16)) + Dprintk(" PAT present.\n"); + if (m->mpc_featureflag&(1<<17)) + Dprintk(" PSE present.\n"); + if (m->mpc_featureflag&(1<<18)) + Dprintk(" PSN present.\n"); + if (m->mpc_featureflag&(1<<19)) + Dprintk(" Cache Line Flush Instruction present.\n"); + /* 20 Reserved */ + if (m->mpc_featureflag&(1<<21)) + Dprintk(" Debug Trace and EMON Store present.\n"); + if (m->mpc_featureflag&(1<<22)) + Dprintk(" ACPI Thermal Throttle Registers present.\n"); + if (m->mpc_featureflag&(1<<23)) + Dprintk(" MMX present.\n"); + if (m->mpc_featureflag&(1<<24)) + Dprintk(" FXSR present.\n"); + if (m->mpc_featureflag&(1<<25)) + Dprintk(" XMM present.\n"); + if (m->mpc_featureflag&(1<<26)) + Dprintk(" Willamette New Instructions present.\n"); + if (m->mpc_featureflag&(1<<27)) + Dprintk(" Self Snoop present.\n"); + if (m->mpc_featureflag&(1<<28)) + Dprintk(" HT present.\n"); + if (m->mpc_featureflag&(1<<29)) + Dprintk(" Thermal Monitor present.\n"); + /* 30, 31 Reserved */ + + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + Dprintk(" Bootup CPU\n"); + boot_cpu_physical_apicid = m->mpc_apicid; + boot_cpu_logical_apicid = logical_apicid; + } + + num_processors++; + + if (m->mpc_apicid > MAX_APICS) { + printk("Processor #%d INVALID. (Max ID: %d).\n", + m->mpc_apicid, MAX_APICS); + --num_processors; + return; + } + ver = m->mpc_apicver; + + logical_cpu_present_map |= 1 << (num_processors-1); + phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid); + + /* + * Validate version + */ + if (ver == 0x0) { + printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; + raw_phys_apicid[num_processors - 1] = m->mpc_apicid; } static void __init MP_bus_info (struct mpc_config_bus *m) { - char str[7]; + char str[7]; + int quad; - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); - - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else { - printk("Unknown bustype %s - ignoring\n", str); - } + if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + quad = translation_table[mpc_record]->trans_quad; + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local; + quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid; + printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad); + } else { + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + } + + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; + } else { + printk("Unknown bustype %s - ignoring\n", str); + } } static void __init MP_ioapic_info (struct mpc_config_ioapic *m) { - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk("I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk("Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk("WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); - return; - } - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk("I/O APIC #%d Version %d at 0x%lX.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + if (nr_ioapics >= MAX_IO_APICS) { + printk("Max # of I/O APICs (%d) exceeded (found %d).\n", + MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); + } + if (!m->mpc_apicaddr) { + printk(KERN_ERR "WARNING: bogus zero I/O APIC address" + " found in MP table, skipping!\n"); + return; + } + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; } static void __init MP_intsrc_info (struct mpc_config_intsrc *m) { - mp_irqs [mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); + mp_irqs [mp_irq_entries] = *m; + Dprintk("Int: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + if (++mp_irq_entries == max_irq_sources) + panic("Max # of irq sources exceeded!!\n"); } static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) { - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); - /* - * Well it seems all SMP boards in existence - * use ExtINT/LVT1 == LINT0 and - * NMI/LVT2 == LINT1 - the following check - * will show us if this assumptions is false. - * Until then we do not have to add baggage. - */ - if ((m->mpc_irqtype == mp_ExtINT) && - (m->mpc_destapiclint != 0)) - BUG(); - if ((m->mpc_irqtype == mp_NMI) && - (m->mpc_destapiclint != 1)) - BUG(); + Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); } +static void __init MP_translation_info (struct mpc_config_translation *m) +{ + printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); + + if (mpc_record >= MAX_MPC_ENTRY) + printk("MAX_MPC_ENTRY exceeded!\n"); + else + translation_table[mpc_record] = m; /* stash this for later */ + if (m->trans_quad+1 > numnodes) + numnodes = m->trans_quad+1; +} + +/* + * Read/parse the MPC oem tables + */ + +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ + unsigned short oemsize) +{ + int count = sizeof (*oemtable); /* the header size */ + unsigned char *oemptr = ((unsigned char *)oemtable)+count; + + printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable); + if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) + { + printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n", + oemtable->oem_signature[0], + oemtable->oem_signature[1], + oemtable->oem_signature[2], + oemtable->oem_signature[3]); + return; + } + if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) + { + printk("SMP oem mptable: checksum error!\n"); + return; + } + while (count < oemtable->oem_length) { + switch (*oemptr) { + case MP_TRANSLATION: + { + struct mpc_config_translation *m= + (struct mpc_config_translation *)oemptr; + MP_translation_info(m); + oemptr += sizeof(*m); + count += sizeof(*m); + ++mpc_record; + break; + } + default: + { + printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr); + return; + } + } + } +} /* * Read/parse the MPC @@ -248,383 +403,542 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) static int __init smp_read_mpc(struct mp_config_table *mpc) { - char str[16]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { - panic("SMP mptable: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], - mpc->mpc_signature[1], - mpc->mpc_signature[2], - mpc->mpc_signature[3]); - return 0; - } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { - panic("SMP mptable: checksum error!\n"); - return 0; - } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { - printk("SMP mptable: bad table version (%d)!!\n", - mpc->mpc_spec); - return 0; - } - if (!mpc->mpc_lapic) { - printk("SMP mptable: null local APIC address!\n"); - return 0; - } - memcpy(str,mpc->mpc_oem,8); - str[8]=0; - printk("OEM ID: %s ",str); - - memcpy(str,mpc->mpc_productid,12); - str[12]=0; - printk("Product ID: %s ",str); - - printk("APIC at: 0x%lX\n", mpc->mpc_lapic); - - /* save the local APIC address, it might be non-default. */ - mp_lapic_addr = mpc->mpc_lapic; - - /* - * Now process the configuration blocks. - */ - while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; - - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - default: - { - count = mpc->mpc_length; - break; - } - } - ++mpc_record; - } - - if (!num_processors) - printk("SMP mptable: no processors registered!\n"); - return num_processors; + char oem[16], prod[14]; + int count=sizeof(*mpc); + unsigned char *mpt=((unsigned char *)mpc)+count; + int num_bus = 0; + int num_irq = 0; + unsigned char *bus_data; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { + panic("SMP mptable: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 0; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { + panic("SMP mptable: checksum error!\n"); + return 0; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { + printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", + mpc->mpc_spec); + return 0; + } + if (!mpc->mpc_lapic) { + printk(KERN_ERR "SMP mptable: null local APIC address!\n"); + return 0; + } + memcpy(oem,mpc->mpc_oem,8); + oem[8]=0; + printk("OEM ID: %s ",oem); + + memcpy(prod,mpc->mpc_productid,12); + prod[12]=0; + printk("Product ID: %s ",prod); + + detect_clustered_apic(oem, prod); + + printk("APIC at: 0x%lX\n",mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default, + * but only if we're not using the ACPI tables + */ + if (!have_acpi_tables) + mp_lapic_addr = mpc->mpc_lapic; + + if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) { + /* We need to process the oem mpc tables to tell us which quad things are in ... */ + mpc_record = 0; + smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize); + mpc_record = 0; + } + + /* Pre-scan to determine the number of bus and + * interrupts records we have + */ + while (count < mpc->mpc_length) { + switch (*mpt) { + case MP_PROCESSOR: + mpt += sizeof(struct mpc_config_processor); + count += sizeof(struct mpc_config_processor); + break; + case MP_BUS: + ++num_bus; + mpt += sizeof(struct mpc_config_bus); + count += sizeof(struct mpc_config_bus); + break; + case MP_INTSRC: + ++num_irq; + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); + break; + case MP_IOAPIC: + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); + break; + case MP_LINTSRC: + mpt += sizeof(struct mpc_config_lintsrc); + count += sizeof(struct mpc_config_lintsrc); + break; + default: + count = mpc->mpc_length; + break; + } + } + /* + * Paranoia: Allocate one extra of both the number of busses and number + * of irqs, and make sure that we have at least 4 interrupts per PCI + * slot. But some machines do not report very many busses, so we need + * to fall back on the older defaults. + */ + ++num_bus; + max_mp_busses = max(num_bus, MAX_MP_BUSSES); + if (num_irq < (4 * max_mp_busses)) + num_irq = 4 * num_bus; /* 4 intr/PCI slot */ + ++num_irq; + max_irq_sources = max(num_irq, MAX_IRQ_SOURCES); + + count = (max_mp_busses * sizeof(int)) * 4; + count += (max_irq_sources * sizeof(struct mpc_config_intsrc)); + + { + //bus_data = alloc_bootmem(count); XXX Xen + static char arr[4096]; + if(count > 4096) BUG(); + bus_data = (void*)arr; + + } + if (!bus_data) { + printk(KERN_ERR "SMP mptable: out of memory!\n"); + return 0; + } + mp_bus_id_to_type = (int *)&bus_data[0]; + mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))]; + mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2]; + mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3]; + mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4]; + memset(mp_bus_id_to_pci_bus, -1, max_mp_busses); + + /* + * Now process the configuration blocks. + */ + count = sizeof(*mpc); + mpt = ((unsigned char *)mpc)+count; + while (count < mpc->mpc_length) { + switch(*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + + /* ACPI may already have provided this one for us */ + if (!have_acpi_tables) + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + default: + { + count = mpc->mpc_length; + break; + } + } + ++mpc_record; + } + + if (clustered_apic_mode){ + phys_cpu_present_map = logical_cpu_present_map; + } + + + printk("Enabling APIC mode: "); + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + printk("Clustered Logical. "); + else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC) + printk("Physical. "); + else + printk("Flat. "); + printk("Using %d I/O APICs\n",nr_ioapics); + + if (!num_processors) + printk(KERN_ERR "SMP mptable: no processors registered!\n"); + return num_processors; } static int __init ELCR_trigger(unsigned int irq) { - unsigned int port; + unsigned int port; - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; + port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; } static void __init construct_default_ioirq_mptable(int mpc_default_type) { - struct mpc_config_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ - intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; - - intsrc.mpc_irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk("ELCR contains invalid data... not using ELCR\n"); - else { - printk("Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.mpc_irqflag = 13; - else - intsrc.mpc_irqflag = 0; - } - - intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); - } - - intsrc.mpc_irqtype = mp_ExtINT; - intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); + struct mpc_config_intsrc intsrc; + int i; + int ELCR_fallback = 0; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_srcbus = 0; + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + + intsrc.mpc_irqtype = mp_INT; + + /* + * If true, we have an ISA/PCI system with no IRQ entries + * in the MP table. To prevent the PCI interrupts from being set up + * incorrectly, we try to use the ELCR. The sanity check to see if + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can + * never be level sensitive, so we simply see if the ELCR agrees. + * If it does, we assume it's valid. + */ + if (mpc_default_type == 5) { + printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) + printk("ELCR contains invalid data... not using ELCR\n"); + else { + printk("Using ELCR to identify PCI interrupts\n"); + ELCR_fallback = 1; + } + } + + for (i = 0; i < 16; i++) { + switch (mpc_default_type) { + case 2: + if (i == 0 || i == 13) + continue; /* IRQ0 & IRQ13 not connected */ + /* fall through */ + default: + if (i == 2) + continue; /* IRQ2 is never connected */ + } + + if (ELCR_fallback) { + /* + * If the ELCR indicates a level-sensitive interrupt, we + * copy that information over to the MP table in the + * irqflag field (level sensitive, active high polarity). + */ + if (ELCR_trigger(i)) + intsrc.mpc_irqflag = 13; + else + intsrc.mpc_irqflag = 0; + } + + intsrc.mpc_srcbusirq = i; + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + MP_intsrc_info(&intsrc); + } + + intsrc.mpc_irqtype = mp_ExtINT; + intsrc.mpc_srcbusirq = 0; + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + MP_intsrc_info(&intsrc); } static inline void __init construct_default_ISA_mptable(int mpc_default_type) { - struct mpc_config_processor processor; - struct mpc_config_bus bus; - struct mpc_config_ioapic ioapic; - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | - boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - switch (mpc_default_type) { - default: - printk("???\nUnknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.mpc_busid = 1; - memcpy(bus.mpc_bustype, "PCI ", 6); - MP_bus_info(&bus); - } - - ioapic.mpc_type = MP_IOAPIC; - ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.mpc_flags = MPC_APIC_USABLE; - ioapic.mpc_apicaddr = 0xFEC00000; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); - - lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ - lintsrc.mpc_srcbusid = 0; - lintsrc.mpc_srcbusirq = 0; - lintsrc.mpc_destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.mpc_irqtype = linttypes[i]; - lintsrc.mpc_destapiclint = i; - MP_lintsrc_info(&lintsrc); - } + struct mpc_config_processor processor; + struct mpc_config_bus bus; + struct mpc_config_ioapic ioapic; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | + boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + switch (mpc_default_type) { + default: + printk("???\nUnknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { + bus.mpc_busid = 1; + memcpy(bus.mpc_bustype, "PCI ", 6); + MP_bus_info(&bus); + } + + ioapic.mpc_type = MP_IOAPIC; + ioapic.mpc_apicid = 2; + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + ioapic.mpc_flags = MPC_APIC_USABLE; + ioapic.mpc_apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); + + /* + * We set up most of the low 16 IO-APIC pins according to MPS rules. + */ + construct_default_ioirq_mptable(mpc_default_type); + + lintsrc.mpc_type = MP_LINTSRC; + lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_srcbusid = 0; + lintsrc.mpc_srcbusirq = 0; + lintsrc.mpc_destapic = MP_APIC_ALL; + for (i = 0; i < 2; i++) { + lintsrc.mpc_irqtype = linttypes[i]; + lintsrc.mpc_destapiclint = i; + MP_lintsrc_info(&lintsrc); + } } static struct intel_mp_floating *mpf_found; +extern void config_acpi_tables(void); /* * Scan the memory blocks for an SMP configuration block. */ void __init get_smp_config (void) { - struct intel_mp_floating *mpf = mpf_found; + struct intel_mp_floating *mpf = mpf_found; + +#ifdef CONFIG_X86_IO_APIC + /* + * Check if the ACPI tables are provided. Use them only to get + * the processor information, mainly because it provides + * the info on the logical processor(s), rather than the physical + * processor(s) that are provided by the MPS. We attempt to + * check only if the user provided a commandline override + */ + //XXX Xen config_acpi_tables(); +#endif - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2 & (1<<7)) { - printk(" IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(" Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } - - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - - printk("Default MP configuration #%d\n", mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); - - } else if (mpf->mpf_physptr) { - - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { - smp_found_config = 0; - printk("BIOS bug, MP table errors detected!...\n"); - printk("... disabling SMP support. (tell your hw vendor)\n"); - return; - } - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_config_bus bus; - - printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - memcpy(bus.mpc_bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } - - } else - BUG(); - - printk("Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + if (mpf->mpf_feature2 & (1<<7)) { + printk(" IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(" Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } + + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + + printk("Default MP configuration #%d\n", mpf->mpf_feature1); + construct_default_ISA_mptable(mpf->mpf_feature1); + + } else if (mpf->mpf_physptr) { + + /* + * Read the physical hardware table. Anything here will + * override the defaults. + */ + if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + smp_found_config = 0; + printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); + return; + } + /* + * If there are no explicit MP IRQ entries, then we are + * broken. We set up most of the low 16 IO-APIC pins to + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { + struct mpc_config_bus bus; + + printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + memcpy(bus.mpc_bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); + } + + } else + BUG(); + + printk("Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ } static int __init smp_scan_config (unsigned long base, unsigned long length) { - unsigned long *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); - if (sizeof(*mpf) != 16) - printk("Error: MPF size\n"); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { - - smp_found_config = 1; - printk("found SMP MP-table at %08lx\n", - virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) - reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); - mpf_found = mpf; - return 1; - } - bp += 4; - length -= 16; - } - return 0; + unsigned long *bp = phys_to_virt(base); + struct intel_mp_floating *mpf; + + Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + if (sizeof(*mpf) != 16) + printk("Error: MPF size\n"); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4)) ) { + + smp_found_config = 1; + printk("found SMP MP-table at %08lx\n", + virt_to_phys(mpf)); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + if (mpf->mpf_physptr) + reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); + mpf_found = mpf; + return 1; + } + bp += 4; + length -= 16; + } + return 0; } void __init find_intel_smp (void) { - /* - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) - return; + unsigned int address; + + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... + * + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0,0x400) || + smp_scan_config(639*0x400,0x400) || + smp_scan_config(0xF0000,0x10000)) + return; + /* + * If it is an SMP machine we should know now, unless the + * configuration is in an EISA/MCA bus machine with an + * extended bios data area. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. + * + * NOTE! There were Linux loaders that will corrupt the EBDA + * area, and as such this kind of SMP config may be less + * trustworthy, simply because the SMP table may have been + * stomped on during early boot. Thankfully the bootloaders + * now honour the EBDA. + */ + + address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + smp_scan_config(address, 0x1000); +} + +#else + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesn't have a BIOS(-configuration table). + * No problem for Linux. + */ +void __init find_visws_smp(void) +{ + smp_found_config = 1; + + phys_cpu_present_map |= 2; /* or in id 1 */ + apic_version[1] |= 0x10; /* integrated APIC */ + apic_version[0] |= 0x10; + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; } +#endif + /* * - Intel MP Configuration Table * - or SGI Visual Workstation configuration */ void __init find_smp_config (void) { - find_intel_smp(); +#ifdef CONFIG_X86_LOCAL_APIC + find_intel_smp(); +#endif +#ifdef CONFIG_VISWS + find_visws_smp(); +#endif } diff --git a/xen-2.4.16/arch/i386/process.c b/xen-2.4.16/arch/i386/process.c index a23f4b1557..d3cedf4766 100644 --- a/xen-2.4.16/arch/i386/process.c +++ b/xen-2.4.16/arch/i386/process.c @@ -364,7 +364,6 @@ void new_thread(struct task_struct *p, /* NB. prev_p passed in %eax, next_p passed in %edx */ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { - extern struct desc_struct idt_table[]; struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; struct tss_struct *tss = init_tss + smp_processor_id(); diff --git a/xen-2.4.16/arch/i386/setup.c b/xen-2.4.16/arch/i386/setup.c index 924d0ce2a1..e81f2da3ff 100644 --- a/xen-2.4.16/arch/i386/setup.c +++ b/xen-2.4.16/arch/i386/setup.c @@ -283,6 +283,9 @@ void __init start_of_day(void) extern void ac_timer_init(void); extern int setup_network_devices(void); extern void net_init(void); + extern void initialize_block_io(void); + extern void initialize_serial(void); + extern void initialize_keyboard(void); unsigned long low_mem_size; @@ -338,9 +341,15 @@ void __init start_of_day(void) pci_init(); #endif do_initcalls(); + + initialize_serial(); /* setup serial 'driver' (for debugging) */ + initialize_keyboard(); /* setup keyboard (also for debugging) */ + if ( !setup_network_devices() ) panic("Must have a network device!\n"); - net_init(); /* initializes virtual network system. */ + net_init(); /* initializes virtual network system. */ + initialize_block_io(); /* setup block devices */ + #ifdef CONFIG_SMP wait_init_idle = cpu_online_map; diff --git a/xen-2.4.16/arch/i386/smpboot.c b/xen-2.4.16/arch/i386/smpboot.c index dd0f94bd13..6afdd0ecfd 100644 --- a/xen-2.4.16/arch/i386/smpboot.c +++ b/xen-2.4.16/arch/i386/smpboot.c @@ -395,6 +395,10 @@ int cpucount; */ int __init start_secondary(void *unused) { + unsigned int cpu = smp_processor_id(); + /* 6 bytes suitable for passing to LIDT instruction. */ + unsigned char idt_load[6]; + extern void cpu_init(void); /* @@ -409,6 +413,16 @@ int __init start_secondary(void *unused) rep_nop(); /* + * At this point, boot CPU has fully initialised the IDT. It is + * now safe to make ourselves a private copy. + */ + idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL); + memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8); + *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1; + *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; + __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); + + /* * low-memory mappings have been cleared, flush them from the local TLBs * too. */ diff --git a/xen-2.4.16/arch/i386/traps.c b/xen-2.4.16/arch/i386/traps.c index cdea19eaa6..b8297fe3eb 100644 --- a/xen-2.4.16/arch/i386/traps.c +++ b/xen-2.4.16/arch/i386/traps.c @@ -43,12 +43,10 @@ asmlinkage int hypervisor_call(void); asmlinkage void lcall7(void); asmlinkage void lcall27(void); -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +/* Master table, and the one used by CPU0. */ +struct desc_struct idt_table[256] = { {0, 0}, }; +/* All other CPUs have their own copy. */ +struct desc_struct *idt_tables[NR_CPUS] = { 0 }; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -299,7 +297,12 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) ti = current->thread.traps + (error_code>>3); if ( ti->dpl >= (regs->xcs & 3) ) { - if ( (error_code>>3)==0x80 ) { printk("!!!\n"); BUG(); } + /* XXX Kill next conditional soon :-) XXX */ + if ( (error_code>>3)==0x80 ) + { + printk("DIDN'T USE FAST-TRAP HANDLER FOR 0x80!!! :-(\n"); + BUG(); + } gtb->flags = GTBF_TRAP_NOCODE; gtb->cs = ti->cs; gtb->eip = ti->address; @@ -542,6 +545,9 @@ void __init trap_init(void) /* Only ring 1 can access monitor services. */ _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call); + /* CPU0 uses the master IDT. */ + idt_tables[0] = idt_table; + /* * Should be a barrier for any external CPU state. */ diff --git a/xen-2.4.16/common/event.c b/xen-2.4.16/common/event.c index 6a81c63f8b..4514d02eb3 100644 --- a/xen-2.4.16/common/event.c +++ b/xen-2.4.16/common/event.c @@ -15,13 +15,15 @@ typedef void (*hyp_event_callback_fn_t)(void); extern void schedule(void); extern void flush_rx_queue(void); +extern void flush_blk_queue(void); /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */ static hyp_event_callback_fn_t event_call_fn[] = { schedule, flush_rx_queue, - kill_domain + kill_domain, + flush_blk_queue }; /* Handle outstanding events for the currently-executing domain. */ diff --git a/xen-2.4.16/drivers/Makefile b/xen-2.4.16/drivers/Makefile index 5aa320fcbe..bee17fa208 100644 --- a/xen-2.4.16/drivers/Makefile +++ b/xen-2.4.16/drivers/Makefile @@ -1,12 +1,16 @@ default: + $(MAKE) -C char $(MAKE) -C pci $(MAKE) -C net $(MAKE) -C block $(MAKE) -C ide +# $(MAKE) -C scsi clean: + $(MAKE) -C char clean $(MAKE) -C pci clean $(MAKE) -C net clean $(MAKE) -C block clean $(MAKE) -C ide clean +# $(MAKE) -C scsi clean diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c index 0ee8477c71..06d9fb72e9 100644 --- a/xen-2.4.16/drivers/block/ll_rw_blk.c +++ b/xen-2.4.16/drivers/block/ll_rw_blk.c @@ -31,8 +31,12 @@ #include <xeno/slab.h> #include <xeno/module.h> +static void end_buffer_dummy(struct buffer_head *bh, int uptodate) +{ + /* do nothing */ +} + /* This will die as all synchronous stuff is coming to an end */ -#define end_buffer_io_sync NULL #define complete(_r) panic("completion.h stuff may be needed...") /* @@ -307,10 +311,14 @@ static void generic_plug_device(request_queue_t *q, kdev_t dev) */ static inline void __generic_unplug_device(request_queue_t *q) { + /* printk(KERN_ALERT "__generic_unplug_device %p %d\n", q, q->plugged); */ if (q->plugged) { q->plugged = 0; if (!list_empty(&q->queue_head)) + { + /* printk(KERN_ALERT " calling %p\n", q->request_fn); */ q->request_fn(q); + } } } @@ -319,6 +327,8 @@ void generic_unplug_device(void *data) request_queue_t *q = (request_queue_t *) data; unsigned long flags; + /* printk(KERN_ALERT "generic_unplug_device\n"); */ + spin_lock_irqsave(&io_request_lock, flags); __generic_unplug_device(q); spin_unlock_irqrestore(&io_request_lock, flags); @@ -856,6 +866,8 @@ static int __make_request(request_queue_t * q, int rw, int latency; elevator_t *elevator = &q->elevator; + /* printk(KERN_ALERT "__make_request\n");*/ + count = bh->b_size >> 9; sector = bh->b_rsector; @@ -1061,6 +1073,8 @@ void generic_make_request (int rw, struct buffer_head * bh) int minorsize = 0; request_queue_t *q; + /* printk(KERN_ALERT "generic_make_request\n"); */ + if (!bh->b_end_io) BUG(); @@ -1130,6 +1144,8 @@ void submit_bh(int rw, struct buffer_head * bh) { int count = bh->b_size >> 9; + /* printk(KERN_ALERT "submit_bh\n"); */ + if (!test_bit(BH_Lock, &bh->b_state)) BUG(); @@ -1141,7 +1157,7 @@ void submit_bh(int rw, struct buffer_head * bh) * further remap this. */ bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; + /* bh->b_rsector = bh->b_blocknr * count; */ generic_make_request(rw, bh); @@ -1194,6 +1210,8 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) int correct_size; int i; + /* printk(KERN_ALERT "ll_rw_block %d %d\n", rw, nr); */ + if (!nr) return; @@ -1229,14 +1247,14 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) /* We have the buffer lock */ atomic_inc(&bh->b_count); - bh->b_end_io = end_buffer_io_sync; + bh->b_end_io = end_buffer_dummy; switch(rw) { case WRITE: if (!atomic_set_buffer_clean(bh)) /* Hmmph! Nothing to write */ goto end_io; - __mark_buffer_clean(bh); + /* __mark_buffer_clean(bh); */ break; case READA: @@ -1302,6 +1320,7 @@ int end_that_request_first (struct request *req, int uptodate, char *name) req->bh = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, uptodate); + end_block_io_op(bh); if ((bh = req->bh) != NULL) { req->hard_sector += nsect; req->hard_nr_sectors -= nsect; diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c new file mode 100644 index 0000000000..b6d0e8993f --- /dev/null +++ b/xen-2.4.16/drivers/block/xen_block.c @@ -0,0 +1,620 @@ +/* + * xen-block.c + * + * process incoming block io requests from guestos's. + */ + +#include <xeno/config.h> +#include <xeno/types.h> +#include <xeno/lib.h> +#include <xeno/sched.h> +#include <xeno/blkdev.h> +#include <xeno/event.h> /* mark_hyp_event */ +#include <hypervisor-ifs/block.h> +#include <hypervisor-ifs/hypervisor-if.h> +#include <asm-i386/io.h> +#include <asm/spinlock.h> + +#define XEN_BLK_DEBUG 0 +#define XEN_BLK_DEBUG_LEVEL KERN_ALERT + +#define XEN_BLK_REQUEST_LIST_SIZE 256 /* very arbitrary */ + +typedef struct blk_request +{ + struct list_head queue; + struct buffer_head *bh; + blk_ring_entry_t request; + struct task_struct *domain; /* requesting domain */ +} blk_request_t; + +static int pending_work; /* which domains have work for us? */ +blk_request_t blk_request_list[XEN_BLK_REQUEST_LIST_SIZE]; +struct list_head free_queue; /* unused requests */ +struct list_head pending_queue; /* waiting for hardware */ +struct list_head io_done_queue; /* request completed. send to guest os */ +spinlock_t free_queue_lock; +spinlock_t pending_queue_lock; +spinlock_t io_done_queue_lock; + +/* some definitions */ +void dumpx (char *buffer, int count); +void printx (char * string); +long do_block_io_op_domain (struct task_struct* task); +int dispatch_rw_block_io (int index); +int dispatch_probe_block_io (int index); +int dispatch_debug_block_io (int index); + +/* + * end_block_io_op + * + * IO has completed. Need to notify the guest operating system. + * Called from hardware interrupt. + */ + +void end_block_io_op(struct buffer_head * bh) +{ + unsigned long cpu_mask; + /* struct list_head *list;*/ + blk_request_t *blk_request = NULL; + unsigned long flags; /* irq save */ + +#if 0 + printk("{E}"); +#endif + if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL + "XEN end_block_io_op, bh: %lx\n", + (unsigned long)bh); + + { + char temp[100]; + sprintf(temp, "endio bh: 0x%p, blkno: 0x%lx", + bh, bh->b_blocknr); + printx(temp); + } + + spin_lock_irqsave(&pending_queue_lock, flags); + /* + list_for_each (list, &pending_queue) + { + blk_request = list_entry(list, blk_request_t, queue); + if (blk_request->bh == bh) + { + break; + } + } + */ + blk_request = (blk_request_t *)bh->b_xen_request; + if (blk_request == NULL) + { + printk (KERN_ALERT + " block io interrupt received for unknown buffer [0x%lx]\n", + (unsigned long) bh); + spin_unlock_irqrestore(&pending_queue_lock, flags); + return; + } + list_del(&blk_request->queue); + spin_unlock_irqrestore(&pending_queue_lock, flags); + + spin_lock_irqsave(&io_done_queue_lock, flags); + list_add_tail(&blk_request->queue, &io_done_queue); + spin_unlock_irqrestore(&io_done_queue_lock, flags); + + /* enqueue work */ + cpu_mask = mark_hyp_event(blk_request->domain, _HYP_EVENT_BLK_RX); + + return; +} + +/* + * flush_blk_queue + * + * Called by the hypervisor synchronously when there is something to do + * (block transfers have completed) + */ + +void flush_blk_queue(void) +{ + blk_request_t *blk_request; + int position = 0; + blk_ring_t *blk_ring; + unsigned long flags; + int loop; + +#if 0 + printk("{F}"); +#endif + /* + if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL + "XEN flush_blk_queue\n"); + */ + + clear_bit(_HYP_EVENT_BLK_RX, ¤t->hyp_events); + + /* NEED LOCK? */ + spin_lock_irqsave(&io_done_queue_lock, flags); + while (!list_empty(&io_done_queue)) + { + blk_request = list_entry(io_done_queue.next, blk_request_t, queue); + list_del (&blk_request->queue); + spin_unlock_irqrestore(&io_done_queue_lock, flags); + + /* place on ring for guest os */ + blk_ring = blk_request->domain->blk_ring_base; + position = blk_ring->rx_prod; + + if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL + "XEN flush_blk_queue [%d]\n", position); + + memcpy(&blk_ring->rx_ring[position], &blk_request->request, + sizeof(blk_ring_entry_t)); + blk_ring->rx_prod = BLK_RX_RING_INC(blk_ring->rx_prod); + + /* notify appropriate guest os */ + set_bit(_EVENT_BLK_RX, + &blk_request->domain->shared_info->events); + + if (0) + { + int temp; + struct buffer_head *bh = blk_request->bh; + char * vbuffer = bh->b_data; + + printk (KERN_ALERT "XEN return block 0x%lx\n", bh->b_blocknr); + + for (temp = 0; temp < bh->b_size; temp++) + { + if (temp % 16 == 0) printk ("[%04x] ", temp); + else if (temp % 4 == 0) printk (" "); + printk ("%02x", + vbuffer[temp] & 255); + if ((temp + 1) % 16 == 0) printk ("\n"); + } + printk ("\n\n"); + } + + /* free the buffer header allocated in do_block_io_op */ + if (blk_request->bh) + { + kfree(blk_request->bh); /* alloc in do_block_io_op */ + } + + spin_lock_irqsave(&free_queue_lock, flags); + list_add_tail(&blk_request->queue, &free_queue); + spin_unlock_irqrestore(&free_queue_lock, flags); + + spin_lock_irqsave(&io_done_queue_lock, flags); + } + spin_unlock_irqrestore(&io_done_queue_lock, flags); + + /* + * now check if there is any pending work from any domain + * that we were previously unable to process. + * + * NOTE: the current algorithm will check _every_ domain + * and wake up _every_ domain that has pending work. + * In the future, we should stop waking up domains once + * there isn't any space for their requests any more + * ALSO, we need to maintain a counter of the last domain + * that we woke up for fairness... we shouldn't restart + * at domain 0 every time (although we might want to special + * case domain 0); + */ + for (loop = 0; loop < XEN_BLOCK_MAX_DOMAINS; loop++) + { + int domain = pending_work & (1 << loop); + + if (domain) + { + struct task_struct *mytask = current; + + /* + printk (KERN_ALERT + "flush_blk_queue pending_work: %x domain: %d loop: %d\n", + pending_work, domain, loop); + */ + /* IS THERE A BETTER WAY OF FINDING THE TASK STRUCT FOR A + * PARTICULAR DOMAIN? + * + * WHAT IF THE TASK GOES AWAY BEFORE WE HAVE A CHANCE TO + * FINISH PROCESSING ALL OF ITS REQUESTS? + */ + while (mytask->domain != loop) + { + mytask = mytask->next_task; + } + do_block_io_op_domain(mytask); + + pending_work = pending_work & !(1 << loop); + /* + printk (KERN_ALERT + " pending_work: %x domain: %d loop: %d\n", + pending_work, domain, loop); + */ + } + } +} + +/* + * do_block_io_op + * + * Accept a block io request from a guest operating system. + * There is an entry in the hypervisor_call_table (xen/arch/i386/entry.S). + */ + +long do_block_io_op (void) +{ + return do_block_io_op_domain(current); +} + +/* + * do_block_io_op + * + * handle the requests for a particular domain + */ + +long do_block_io_op_domain (struct task_struct* task) +{ + blk_ring_t *blk_ring = task->blk_ring_base; + int loop; + +#if 0 + printk("{%d}", current->domain); +#endif + if (XEN_BLK_DEBUG) printk(XEN_BLK_DEBUG_LEVEL + "XEN do_block_io_op %d %d\n", + blk_ring->tx_cons, blk_ring->tx_prod); + + for (loop = blk_ring->tx_cons; + loop != blk_ring->tx_prod; + loop = BLK_TX_RING_INC(loop)) + { + int status = 1; + + switch (blk_ring->tx_ring[loop].operation) + { + case XEN_BLOCK_READ : + case XEN_BLOCK_WRITE : + { + status = dispatch_rw_block_io(loop); + break; + } + case XEN_BLOCK_PROBE : + { + status = dispatch_probe_block_io(loop); + break; + } + case XEN_BLOCK_DEBUG : + { + status = dispatch_debug_block_io(loop); + break; + } + default : + { + printk (KERN_ALERT "error: unknown block io operation [%d]\n", + blk_ring->tx_ring[loop].operation); + BUG(); + } + } + + if (status) + { + /* unable to successfully issue / complete command, maybe because + * another resource (e.g. disk request buffers) is unavailable. + * stop removing items from the communications ring and try + * again later + */ + + /* + printk ("do_block_io_op_domain domain:%d, pending_work: %x\n", + task->domain, pending_work); + */ + pending_work = pending_work | (1 << task->domain); + /* + printk ("do_block_io_op_domain domain:%d, pending_work: %x\n", + task->domain, pending_work); + */ + break; + } + } + + blk_ring->tx_cons = loop; + + return 0L; +} + +int dispatch_debug_block_io (int index) +{ + struct task_struct *task; + blk_ring_t *blk_ring = current->blk_ring_base; + char * buffer; + char output[1000]; + + int foobar = (unsigned long)blk_ring->tx_ring[index].block_number; + + printk (KERN_ALERT "dispatch_debug_block_io %d\n", foobar); + + buffer = phys_to_virt(blk_ring->tx_ring[index].buffer); + strcpy (buffer, "DEBUG\n"); + + task = current; + sprintf (buffer, "current %d\n", current->domain); + sprintf (buffer, "%s tx: prod: %d, cons: %d, size: %d\n", buffer, + blk_ring->tx_prod, blk_ring->tx_cons, blk_ring->tx_ring_size); + sprintf (buffer, "%s rx: prod: %d, cons: %d, size: %d\n", buffer, + blk_ring->rx_prod, blk_ring->rx_cons, blk_ring->rx_ring_size); + + task = task->next_task; + while (task != current) + { + blk_ring = task->blk_ring_base; + sprintf (buffer, "%stask %d\n", buffer, task->domain); + if (blk_ring != NULL) + { + sprintf (buffer, "%s tx: prod: %d, cons: %d, size: %d\n", + buffer, blk_ring->tx_prod, blk_ring->tx_cons, + blk_ring->tx_ring_size); + sprintf (buffer, "%s rx: prod: %d, cons: %d, size: %d\n", + buffer, blk_ring->rx_prod, blk_ring->rx_cons, + blk_ring->rx_ring_size); + } + task = task->next_task; + } + dumpx(output, foobar); + sprintf (buffer, "%s%s\n", buffer, output); + + return 0; +} + +int dispatch_probe_block_io (int index) +{ + blk_ring_t *blk_ring = current->blk_ring_base; + xen_disk_info_t *xdi; + + xdi = phys_to_virt(blk_ring->tx_ring[index].buffer); + + ide_probe_devices(xdi); + + return 0; +} + +int dispatch_rw_block_io (int index) +{ + blk_ring_t *blk_ring = current->blk_ring_base; + struct buffer_head *bh; + struct request_queue *rq; + int operation; + blk_request_t *blk_request; + unsigned long flags; + + /* + * check to make sure that the block request seems at least + * a bit legitimate + */ + if ((blk_ring->tx_ring[index].block_size & (0x200 - 1)) != 0) + { + printk(KERN_ALERT + " error: dodgy block size: %d\n", + blk_ring->tx_ring[index].block_size); + BUG(); + } + + if (XEN_BLK_DEBUG) + { + printk(XEN_BLK_DEBUG_LEVEL + " tx_cons: %d tx_prod %d index: %d op: %s, pri: %s\n", + blk_ring->tx_cons, blk_ring->tx_prod, index, + (blk_ring->tx_ring[index].operation == XEN_BLOCK_READ ? "read" : "write"), + (blk_ring->tx_ring[index].priority == XEN_BLOCK_SYNC ? "sync" : "async")); + } + + { + char temp[100]; + sprintf(temp, "issue buf: 0x%p, bh: 0x%p, blkno: 0x%lx", + blk_ring->tx_ring[index].buffer, bh, + (unsigned long)blk_ring->tx_ring[index].block_number); + printx(temp); + } + + /* find an empty request slot */ + spin_lock_irqsave(&free_queue_lock, flags); + if (list_empty(&free_queue)) + { + /* printk (KERN_ALERT "dispatch_rw_block_io EMPTY FREE LIST!! %d\n", index); */ + spin_unlock_irqrestore(&free_queue_lock, flags); + return 1; + } + blk_request = list_entry(free_queue.next, blk_request_t, queue); + list_del(&blk_request->queue); + spin_unlock_irqrestore(&free_queue_lock, flags); + + /* place request on pending list */ + spin_lock_irqsave(&pending_queue_lock, flags); + list_add_tail(&blk_request->queue, &pending_queue); + spin_unlock_irqrestore(&pending_queue_lock, flags); + + /* we'll be doing this frequently, would a cache be appropriate? */ + /* free in flush_blk_queue */ + bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head), + GFP_KERNEL); + if (!bh) + { + printk(KERN_ALERT "ERROR: bh is null\n"); + BUG(); + } + + /* set just the important bits of the buffer header */ + memset (bh, 0, sizeof (struct buffer_head)); + + bh->b_blocknr = blk_ring->tx_ring[index].block_number; /* block number */ + bh->b_size = blk_ring->tx_ring[index].block_size; /* block size */ + bh->b_dev = blk_ring->tx_ring[index].device; /* device (B_FREE = free) */ + bh->b_rsector = blk_ring->tx_ring[index].sector_number; /* sector number */ + + bh->b_data = phys_to_virt(blk_ring->tx_ring[index].buffer); + /* ptr to data blk */ + bh->b_count.counter = 1; /* users using this block */ + bh->b_xen_request = (void *)blk_request; /* save block request */ + + + if (blk_ring->tx_ring[index].operation == XEN_BLOCK_WRITE) + { + bh->b_state = ((1 << BH_JBD) | /* buffer state bitmap */ + (1 << BH_Mapped) | + (1 << BH_Req) | + (1 << BH_Dirty) | + (1 << BH_Uptodate)); + operation = WRITE; + } + else + { + bh->b_state = (1 << BH_Mapped); /* buffer state bitmap */ + operation = READ; + } + + /* save meta data about request */ + memcpy(&blk_request->request, /* NEED COPY_FROM_USER? */ + &blk_ring->tx_ring[index], sizeof(blk_ring_entry_t)); + blk_request->bh = bh; + blk_request->domain = current; /* save current domain */ + + /* dispatch single block request */ + ll_rw_block(operation, 1, &bh); /* linux top half */ + rq = blk_get_queue(bh->b_rdev); + generic_unplug_device(rq); /* linux bottom half */ + + return 0; +} + +/* + * initialize_block_io + * + * initialize everything for block io + * called from arch/i386/setup.c::start_of_day + */ + +void initialize_block_io () +{ + int loop; + + INIT_LIST_HEAD(&free_queue); + INIT_LIST_HEAD(&pending_queue); + INIT_LIST_HEAD(&io_done_queue); + + spin_lock_init(&free_queue_lock); + spin_lock_init(&pending_queue_lock); + spin_lock_init(&io_done_queue_lock); + + for (loop = 0; loop < XEN_BLK_REQUEST_LIST_SIZE; loop++) + { + list_add_tail(&blk_request_list[loop].queue, &free_queue); + } + + /* + * if bit i is true then domain i has work for us to do. + */ + pending_work = 0; + + return; +} + + +#ifdef DEBUG + +/* + * debug dump_queue + * arguments: queue head, name of queue + */ +void dump_queue(struct list_head *queue, char *name) +{ + struct list_head *list; + int loop = 0; + + printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue, + (unsigned long) queue->next, (unsigned long) queue->prev); + list_for_each (list, queue) + { + printk (" %s %d : %lx n: %lx, p: %lx\n", name, loop++, + (unsigned long)list, + (unsigned long)list->next, (unsigned long)list->prev); + } +} + +void dump_queue_head(struct list_head *queue, char *name) +{ + struct list_head *list; + int loop = 0; + + printk ("QUEUE %s %lx n: %lx, p: %lx\n", name, (unsigned long)queue, + (unsigned long) queue->next, (unsigned long) queue->prev); + list_for_each (list, queue) + { + printk (" %d : %lx n: %lx, p: %lx\n", loop++, + (unsigned long)list, + (unsigned long)list->next, (unsigned long)list->prev); + if (loop >= 5) return; + } +} + +#endif /* DEBUG */ + + +#define debug_block_size 200000 +#define debug_output_size 10 + +static int countx = 0; +static char * arrayx[debug_block_size]; +static int outputx = 0; + +void +printx (char * string) +{ + char * s; + + s = (char *) kmalloc(strlen(string), GFP_KERNEL); + strcpy (s, string); + arrayx[countx++] = s; + + if (countx >= debug_block_size) + { + countx = 0; + printk (KERN_ALERT "printx wrap\n"); + } + +} + +void +dumpx (char *buffer, int count) +{ + int loop; + int start; + + sprintf (buffer, "debug dump\n"); + + /* + for (loop = outputx; + loop < outputx + debug_output_size && loop < countx; + loop ++) + { + sprintf (buffer, "%s%02d:%s\n", buffer, loop, arrayx[loop]); + } + outputx = loop; + */ + + if (count == 0 || count > countx) + { + start = 0; + } + else + { + start = countx - count; + } + + printk (KERN_ALERT "DUMPX BUFFER\n"); + for (loop = start; loop < countx; loop++) + { + printk (KERN_ALERT "%4d %s\n", loop, arrayx[loop]); + } + printk (KERN_ALERT "DUMPX bye bye\n"); +} + diff --git a/xen-2.4.16/drivers/char/Makefile b/xen-2.4.16/drivers/char/Makefile new file mode 100644 index 0000000000..574b7d2d79 --- /dev/null +++ b/xen-2.4.16/drivers/char/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o driver.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/drivers/char/xen_kbd.c b/xen-2.4.16/drivers/char/xen_kbd.c new file mode 100644 index 0000000000..780028ab69 --- /dev/null +++ b/xen-2.4.16/drivers/char/xen_kbd.c @@ -0,0 +1,111 @@ +#include <asm-i386/io.h> +#include <xeno/sched.h> /* this has request_irq() proto for some reason */ + +#define KEYBOARD_IRQ 1 + +#define KBD_STATUS_REG 0x64 /* Status register (R) */ +#define KBD_CNTL_REG 0x64 /* Controller command register (W) */ +#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */ + +/* register status bits */ +#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */ +#define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */ +#define KBD_STAT_SELFTEST 0x04 /* Self test successful */ +#define KBD_STAT_CMD 0x08 /* Last write was a command write (0=data) */ + +#define KBD_STAT_UNLOCKED 0x10 /* Zero if keyboard locked */ +#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ +#define KBD_STAT_GTO 0x40 /* General receive/xmit timeout */ +#define KBD_STAT_PERR 0x80 /* Parity error */ + +#define kbd_read_input() inb(KBD_DATA_REG) +#define kbd_read_status() inb(KBD_STATUS_REG) + + + +static void +dispatch_scancode (unsigned char scancode) +{ + + /* + * we could be a bit more clever here, but why? + * just add a jump to your debug routine for the appropriate character. + */ + switch (scancode) + { + case 0x01 : /* esc */ + printk ("<esc>"); + break; + case 0x9e : /* a */ + printk ("a"); + break; + case 0x9f : /* s */ + printk ("s"); + break; + case 0xae : /* c */ + printk ("c"); + break; + case 0xb0 : /* b */ + printk ("b"); + break; + case 0xbb : /* f1 */ + printk ("<f1>"); + break; + case 0xbc : /* f2 */ + printk ("<f2>"); + break; + case 0xbd : /* f3 */ + printk ("<f3>"); + break; + case 0xbe : /* f4 */ + printk ("<f4>"); + break; + case 0xbf : /* f5 */ + /* xen_block_dump_state(); */ + break; + default : + /* printk ("%x ", scancode); */ + } + + return; +} + + +/* regs should be struct pt_regs */ + +static void keyboard_interrupt(int irq, void *dev_id, void *regs) +{ + unsigned char status = kbd_read_status(); + unsigned int work = 10000; + + while ((--work > 0) && (status & KBD_STAT_OBF)) + { + unsigned char scancode; + + scancode = kbd_read_input(); + + if (!(status & (KBD_STAT_GTO | KBD_STAT_PERR))) + { + if (status & KBD_STAT_MOUSE_OBF) + /* mouse event, ignore */; + else + dispatch_scancode (scancode); + } + status = kbd_read_status(); + } + + if (!work) + printk(KERN_ERR "pc_keyb: controller jammed (0x%02X).\n", status); + + return; +} + + +void initialize_keyboard() +{ + if(request_irq(KEYBOARD_IRQ, keyboard_interrupt, 0, "keyboard", NULL)) + printk("initialize_keyboard: failed to alloc IRQ %d\n", KEYBOARD_IRQ); + + return; +} + diff --git a/xen-2.4.16/drivers/char/xen_serial.c b/xen-2.4.16/drivers/char/xen_serial.c new file mode 100644 index 0000000000..7c62567fa4 --- /dev/null +++ b/xen-2.4.16/drivers/char/xen_serial.c @@ -0,0 +1,140 @@ +#include <asm-i386/io.h> +#include <xeno/sched.h> /* this has request_irq() proto for some reason */ + + +/* Register offsets */ +#define NS16550_RBR 0x00 /* receive buffer */ +#define NS16550_THR 0x00 /* transmit holding */ +#define NS16550_IER 0x01 /* interrupt enable */ +#define NS16550_IIR 0x02 /* interrupt identity */ +#define NS16550_FCR 0x02 /* FIFO control */ +#define NS16550_LCR 0x03 /* line control */ +#define NS16550_MCR 0x04 /* MODEM control */ +#define NS16550_LSR 0x05 /* line status */ +#define NS16550_MSR 0x06 /* MODEM status */ +#define NS16550_SCR 0x07 /* scratch */ +#define NS16550_DDL 0x00 /* divisor latch (ls) ( DLAB=1) */ +#define NS16550_DLM 0x01 /* divisor latch (ms) ( DLAB=1) */ + +/* Interrupt enable register */ +#define NS16550_IER_ERDAI 0x01 /* rx data recv'd */ +#define NS16550_IER_ETHREI 0x02 /* tx reg. empty */ +#define NS16550_IER_ELSI 0x04 /* rx line status */ +#define NS16550_IER_EMSI 0x08 /* MODEM status */ + +/* FIFO control register */ +#define NS16550_FCR_ENABLE 0x01 /* enable FIFO */ +#define NS16550_FCR_CLRX 0x02 /* clear Rx FIFO */ +#define NS16550_FCR_CLTX 0x04 /* clear Tx FIFO */ +#define NS16550_FCR_DMA 0x10 /* enter DMA mode */ +#define NS16550_FCR_TRG1 0x00 /* Rx FIFO trig lev 1 */ +#define NS16550_FCR_TRG4 0x40 /* Rx FIFO trig lev 4 */ +#define NS16550_FCR_TRG8 0x80 /* Rx FIFO trig lev 8 */ +#define NS16550_FCR_TRG14 0xc0 /* Rx FIFO trig lev 14 */ + +/* MODEM control register */ +#define NS16550_MCR_DTR 0x01 /* Data Terminal Ready */ +#define NS16550_MCR_RTS 0x02 /* Request to Send */ +#define NS16550_MCR_OUT1 0x04 /* OUT1: unused */ +#define NS16550_MCR_OUT2 0x08 /* OUT2: interrupt mask */ +#define NS16550_MCR_LOOP 0x10 /* Loop */ + +#define SERIAL_BASE 0x3f8 /* XXX SMH: horrible hardwired COM1 */ + + + +/* +** We keep an array of 'handlers' for each key code between 0 and 255; +** this is intended to allow very simple debugging routines (toggle +** debug flag, dump registers, reboot, etc) to be hooked in in a slightly +** nicer fashion than just editing this file :-) +*/ + +#define KEY_MAX 256 +typedef void key_handler(u_char key); + +static key_handler *key_table[KEY_MAX]; + +void add_key_handler(u_char key, key_handler *handler) +{ + if(key_table[key] != NULL) + printk("Warning: overwriting handler for key 0x%x\n", key); + + key_table[key] = handler; + return; +} + + + +static int serial_echo = 0; /* default is not to echo; change with 'e' */ + +void toggle_echo(u_char key) +{ + serial_echo = !serial_echo; + return; +} + + +void halt_machine(u_char key) +{ + /* This is 'debug me please' => just dump info and halt machine */ + printk("serial_rx_int: got EOT => halting machine.\n"); + printk("<not actually halting for now>\n"); + return; +} + + + +static void serial_rx_int(int irq, void *dev_id, struct pt_regs *regs) +{ + u_char c; + + /* XXX SMH: should probably check this is an RX interrupt :-) */ + + /* clear the interrupt by reading the character */ + c = inb(SERIAL_BASE + NS16550_RBR ); + + /* if there's a handler, call it: we trust it won't screw us too badly */ + if(key_table[c]) + (*key_table[c])(c); + + if(serial_echo) + printk("%c", c); + + return; +} + + +void initialize_serial() +{ + int i, fifo, rc; + + /* first initialize key handler table */ + for(i = 0; i < KEY_MAX; i++) + key_table[i] = (key_handler *)NULL; + + /* setup own handlers */ + add_key_handler(0x01, toggle_echo); /* <esc> to toggle echo */ + add_key_handler(0x04, halt_machine); /* CTRL-D to 'halt' */ + + + /* Should detect this, but must be a ns16550a at least, surely? */ + fifo = 1; + if(fifo) { + /* Clear FIFOs, enable, trigger at 1 byte */ + outb(NS16550_FCR_TRG1 | NS16550_FCR_ENABLE | + NS16550_FCR_CLRX | NS16550_FCR_CLTX, SERIAL_BASE+NS16550_FCR); + } + + outb(NS16550_MCR_OUT2, SERIAL_BASE + NS16550_MCR); /* Modem control */ + outb(NS16550_IER_ERDAI, SERIAL_BASE + NS16550_IER ); /* Setup interrupts */ + + /* XXX SMH: this is a hack; probably is IRQ4 but grab both anyway */ + if((rc = request_irq(4, serial_rx_int, 0, "serial", (void *)0x1234))) + printk("initialize_serial: failed to get IRQ4, rc=%d\n", rc); + + if((rc = request_irq(3, serial_rx_int, 0, "serial", (void *)0x1234))) + printk("initialize_serial: failed to get IRQ3, rc=%d\n", rc); + + return; +} diff --git a/xen-2.4.16/drivers/ide/ide-disk.c b/xen-2.4.16/drivers/ide/ide-disk.c index 984e53cd67..0d1cd113cd 100644 --- a/xen-2.4.16/drivers/ide/ide-disk.c +++ b/xen-2.4.16/drivers/ide/ide-disk.c @@ -420,13 +420,13 @@ static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsi taskfile.device_head |= drive->select.all; taskfile.command = command; -#ifdef DEBUG + /* #ifdef DEBUG */ printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); if (lba) printk("LBAsect=%lld, ", block); else printk("CHS=%d/%d/%d, ", cyl, head, sect); printk("sectors=%ld, ", rq->nr_sectors); printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); -#endif + /* #endif*/ memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); @@ -578,7 +578,8 @@ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsig tasklets[9] = (task_ioreg_t) 0; // tasklets[8] = (task_ioreg_t) (block>>32); // tasklets[9] = (task_ioreg_t) (block>>40); -#ifdef DEBUG + /*#ifdef DEBUG */ + printk("[A]\n"); printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n", drive->name, (rq->cmd==READ)?"read":"writ", @@ -590,7 +591,7 @@ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsig drive->name, tasklets[3], tasklets[2], tasklets[9], tasklets[8], tasklets[7], tasklets[6], tasklets[5], tasklets[4]); -#endif + /* #endif */ OUT_BYTE(tasklets[1], IDE_FEATURE_REG); OUT_BYTE(tasklets[3], IDE_NSECTOR_REG); OUT_BYTE(tasklets[7], IDE_SECTOR_REG); @@ -1320,6 +1321,10 @@ static void idedisk_setup (ide_drive_t *drive) struct hd_driveid *id = drive->id; unsigned long capacity; + + printk (KERN_ALERT + "ide-disk.c::idedisk_setup: chs %d %d %d\n", + drive->cyl, drive->head, drive->sect); idedisk_add_settings(drive); @@ -1383,7 +1388,7 @@ static void idedisk_setup (ide_drive_t *drive) if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) && (!drive->forced_geom) && drive->bios_sect && drive->bios_head) drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head; - printk (KERN_INFO "XEN %s: %ld sectors", drive->name, capacity); + printk (KERN_INFO "[XEN] %s: %ld sectors", drive->name, capacity); /* Give size in megabytes (MB), not mebibytes (MiB). */ /* We compute the exact rounded value, avoiding overflow. */ diff --git a/xen-2.4.16/drivers/ide/ide-disk.c.orig b/xen-2.4.16/drivers/ide/ide-disk.c.orig new file mode 100644 index 0000000000..984e53cd67 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-disk.c.orig @@ -0,0 +1,1550 @@ +/* + * linux/drivers/ide/ide-disk.c Version 1.10 June 9, 2000 + * + * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) + */ + +/* + * Mostly written by Mark Lord <mlord@pobox.com> + * and Gadi Oxman <gadio@netvision.net.il> + * and Andre Hedrick <andre@linux-ide.org> + * + * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. + * + * Version 1.00 move disk only code from ide.c to ide-disk.c + * support optional byte-swapping of all data + * Version 1.01 fix previous byte-swapping code + * Version 1.02 remove ", LBA" from drive identification msgs + * Version 1.03 fix display of id->buf_size for big-endian + * Version 1.04 add /proc configurable settings and S.M.A.R.T support + * Version 1.05 add capacity support for ATA3 >= 8GB + * Version 1.06 get boot-up messages to show full cyl count + * Version 1.07 disable door-locking if it fails + * Version 1.08 fixed CHS/LBA translations for ATA4 > 8GB, + * process of adding new ATA4 compliance. + * fixed problems in allowing fdisk to see + * the entire disk. + * Version 1.09 added increment of rq->sector in ide_multwrite + * added UDMA 3/4 reporting + * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 added 48-bit lba + * Version 1.12 adding taskfile io access method + */ + +#define IDEDISK_VERSION "1.12" + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#include <xeno/config.h> +#include <xeno/module.h> +#include <xeno/types.h> +#include <xeno/lib.h> +#include <xeno/timer.h> +#include <xeno/mm.h> +#include <xeno/interrupt.h> +#include <xeno/major.h> +#include <xeno/errno.h> +#include <xeno/genhd.h> +#include <xeno/slab.h> +#include <xeno/delay.h> +#include <xeno/ide.h> + +#include <asm/byteorder.h> +#include <asm/irq.h> +#include <asm/uaccess.h> +#include <asm/io.h> + +#ifdef CONFIG_BLK_DEV_PDC4030 +#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030) +#else +#define IS_PDC4030_DRIVE (0) /* auto-NULLs out pdc4030 code */ +#endif + +#ifdef CONFIG_IDE_TASKFILE_IO +# undef __TASKFILE__IO /* define __TASKFILE__IO */ +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#ifndef __TASKFILE__IO + +static void idedisk_bswap_data (void *buffer, int wcount) +{ + u16 *p = buffer; + + while (wcount--) { + *p = *p << 8 | *p >> 8; p++; + *p = *p << 8 | *p >> 8; p++; + } +} + +static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + ide_input_data(drive, buffer, wcount); + if (drive->bswap) + idedisk_bswap_data(buffer, wcount); +} + +static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + if (drive->bswap) { + idedisk_bswap_data(buffer, wcount); + ide_output_data(drive, buffer, wcount); + idedisk_bswap_data(buffer, wcount); + } else + ide_output_data(drive, buffer, wcount); +} + +#endif /* __TASKFILE__IO */ + +/* + * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" + * value for this drive (from its reported identification information). + * + * Returns: 1 if lba_capacity looks sensible + * 0 otherwise + * + * It is called only once for each drive. + */ +static int lba_capacity_is_ok (struct hd_driveid *id) +{ + unsigned long lba_sects, chs_sects, head, tail; + + if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) { + printk("48-bit Drive: %llu \n", id->lba_capacity_2); + return 1; + } + + /* + * The ATA spec tells large drives to return + * C/H/S = 16383/16/63 independent of their size. + * Some drives can be jumpered to use 15 heads instead of 16. + * Some drives can be jumpered to use 4092 cyls instead of 16383. + */ + if ((id->cyls == 16383 + || (id->cyls == 4092 && id->cur_cyls == 16383)) && + id->sectors == 63 && + (id->heads == 15 || id->heads == 16) && + id->lba_capacity >= 16383*63*id->heads) + return 1; + + lba_sects = id->lba_capacity; + chs_sects = id->cyls * id->heads * id->sectors; + + /* perform a rough sanity check on lba_sects: within 10% is OK */ + if ((lba_sects - chs_sects) < chs_sects/10) + return 1; + + /* some drives have the word order reversed */ + head = ((lba_sects >> 16) & 0xffff); + tail = (lba_sects & 0xffff); + lba_sects = (head | (tail << 16)); + if ((lba_sects - chs_sects) < chs_sects/10) { + id->lba_capacity = lba_sects; + return 1; /* lba_capacity is (now) good */ + } + + return 0; /* lba_capacity value may be bad */ +} + +#ifndef __TASKFILE__IO + +/* + * read_intr() is the handler for disk read/multread interrupts + */ +static ide_startstop_t read_intr (ide_drive_t *drive) +{ + byte stat; + int i; + unsigned int msect, nsect; + struct request *rq; + + /* new way for dealing with premature shared PCI interrupts */ + if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "read_intr", stat); + } + /* no data yet, so wait for another interrupt */ + ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); + return ide_started; + } + msect = drive->mult_count; + +read_next: + rq = HWGROUP(drive)->rq; + if (msect) { + if ((nsect = rq->current_nr_sectors) > msect) + nsect = msect; + msect -= nsect; + } else + nsect = 1; + idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); +#ifdef DEBUG + printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", + drive->name, rq->sector, rq->sector+nsect-1, + (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); +#endif + rq->sector += nsect; + rq->buffer += nsect<<9; + rq->errors = 0; + i = (rq->nr_sectors -= nsect); + if (((long)(rq->current_nr_sectors -= nsect)) <= 0) + ide_end_request(1, HWGROUP(drive)); + if (i > 0) { + if (msect) + goto read_next; + ide_set_handler (drive, &read_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; +} + +/* + * write_intr() is the handler for disk write interrupts + */ +static ide_startstop_t write_intr (ide_drive_t *drive) +{ + byte stat; + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = hwgroup->rq; + + if (!OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { + printk("%s: write_intr error1: nr_sectors=%ld, stat=0x%02x\n", drive->name, rq->nr_sectors, stat); + } else { +#ifdef DEBUG + printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n", + drive->name, rq->sector, (unsigned long) rq->buffer, + rq->nr_sectors-1); +#endif + if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { + rq->sector++; + rq->buffer += 512; + rq->errors = 0; + i = --rq->nr_sectors; + --rq->current_nr_sectors; + if (((long)rq->current_nr_sectors) <= 0) + ide_end_request(1, hwgroup); + if (i > 0) { + idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; + } + return ide_stopped; /* the original code did this here (?) */ + } + return ide_error(drive, "write_intr", stat); +} + +/* + * ide_multwrite() transfers a block of up to mcount sectors of data + * to a drive as part of a disk multiple-sector write operation. + * + * Returns 0 on success. + * + * Note that we may be called from two contexts - the do_rw_disk context + * and IRQ context. The IRQ can happen any time after we've output the + * full "mcount" number of sectors, so we must make sure we update the + * state _before_ we output the final part of the data! + */ +int ide_multwrite (ide_drive_t *drive, unsigned int mcount) +{ + ide_hwgroup_t *hwgroup= HWGROUP(drive); + struct request *rq = &hwgroup->wrq; + + do { + char *buffer; + int nsect = rq->current_nr_sectors; + + if (nsect > mcount) + nsect = mcount; + mcount -= nsect; + buffer = rq->buffer; + + rq->sector += nsect; + rq->buffer += nsect << 9; + rq->nr_sectors -= nsect; + rq->current_nr_sectors -= nsect; + + /* Do we move to the next bh after this? */ + if (!rq->current_nr_sectors) { + struct buffer_head *bh = rq->bh->b_reqnext; + + /* end early early we ran out of requests */ + if (!bh) { + mcount = 0; + } else { + rq->bh = bh; + rq->current_nr_sectors = bh->b_size >> 9; + rq->buffer = bh->b_data; + } + } + + /* + * Ok, we're all setup for the interrupt + * re-entering us on the last transfer. + */ + idedisk_output_data(drive, buffer, nsect<<7); + } while (mcount); + + return 0; +} + +/* + * multwrite_intr() is the handler for disk multwrite interrupts + */ +static ide_startstop_t multwrite_intr (ide_drive_t *drive) +{ + byte stat; + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = &hwgroup->wrq; + + if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { + if (stat & DRQ_STAT) { + /* + * The drive wants data. Remember rq is the copy + * of the request + */ + if (rq->nr_sectors) { + if (ide_multwrite(drive, drive->mult_count)) + return ide_stopped; + ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); + return ide_started; + } + } else { + /* + * If the copy has all the blocks completed then + * we can end the original request. + */ + if (!rq->nr_sectors) { /* all done? */ + rq = hwgroup->rq; + for (i = rq->nr_sectors; i > 0;){ + i -= rq->current_nr_sectors; + ide_end_request(1, hwgroup); + } + return ide_stopped; + } + } + return ide_stopped; /* the original code did this here (?) */ + } + return ide_error(drive, "multwrite_intr", stat); +} +#endif /* __TASKFILE__IO */ + +#ifdef __TASKFILE__IO + +static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block); +static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block); +static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block); + +/* + * do_rw_disk() issues READ and WRITE commands to a disk, + * using LBA if supported, or CHS otherwise, to address sectors. + * It also takes care of issuing special DRIVE_CMDs. + */ +static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + if (rq->cmd == READ) + goto good_command; + if (rq->cmd == WRITE) + goto good_command; + + printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; + +good_command: + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long); + return promise_rw_disk(drive, rq, block); + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ + + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) /* 48-bit LBA */ + return lba_48_rw_disk(drive, rq, (unsigned long long) block); + if (drive->select.b.lba) /* 28-bit LBA */ + return lba_28_rw_disk(drive, rq, (unsigned long) block); + + /* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */ + return chs_rw_disk(drive, rq, (unsigned long) block); +} + +static task_ioreg_t get_command (ide_drive_t *drive, int cmd) +{ + int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0; + +#if 1 + lba48bit = drive->addressing; +#endif + + if ((cmd == READ) && (drive->using_dma)) + return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA; + else if ((cmd == READ) && (drive->mult_count)) + return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD; + else if (cmd == READ) + return (lba48bit) ? WIN_READ_EXT : WIN_READ; + else if ((cmd == WRITE) && (drive->using_dma)) + return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA; + else if ((cmd == WRITE) && (drive->mult_count)) + return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE; + else if (cmd == WRITE) + return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE; + else + return WIN_NOP; +} + +static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + unsigned int track = (block / drive->sect); + unsigned int sect = (block % drive->sect) + 1; + unsigned int head = (track % drive->head); + unsigned int cyl = (track / drive->head); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors; + taskfile.sector_number = sect; + taskfile.low_cylinder = cyl; + taskfile.high_cylinder = (cyl>>8); + taskfile.device_head = head; + taskfile.device_head |= drive->select.all; + taskfile.command = command; + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors; + taskfile.sector_number = block; + taskfile.low_cylinder = (block>>=8); + taskfile.high_cylinder = (block>>=8); + taskfile.device_head = ((block>>8)&0x0f); + taskfile.device_head |= drive->select.all; + taskfile.command = command; + + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +/* + * 268435455 == 137439 MB or 28bit limit + * 320173056 == 163929 MB or 48bit addressing + * 1073741822 == 549756 MB or 48bit addressing fake drive + */ + +static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = rq->nr_sectors; + hobfile.sector_count = (rq->nr_sectors>>8); + + if (rq->nr_sectors == 65536) { + taskfile.sector_count = 0x00; + hobfile.sector_count = 0x00; + } + + taskfile.sector_number = block; /* low lba */ + taskfile.low_cylinder = (block>>=8); /* mid lba */ + taskfile.high_cylinder = (block>>=8); /* hi lba */ + hobfile.sector_number = (block>>=8); /* low lba */ + hobfile.low_cylinder = (block>>=8); /* mid lba */ + hobfile.high_cylinder = (block>>=8); /* hi lba */ + taskfile.device_head = drive->select.all; + hobfile.device_head = taskfile.device_head; + hobfile.control = (drive->ctl|0x80); + taskfile.command = command; + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +#else /* !__TASKFILE__IO */ +/* + * do_rw_disk() issues READ and WRITE commands to a disk, + * using LBA if supported, or CHS otherwise, to address sectors. + * It also takes care of issuing special DRIVE_CMDs. + */ +static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (drive->select.b.lba || IS_PDC4030_DRIVE) { +#else /* !CONFIG_BLK_DEV_PDC4030 */ + if (drive->select.b.lba) { +#endif /* CONFIG_BLK_DEV_PDC4030 */ + + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + task_ioreg_t tasklets[10]; + + tasklets[0] = 0; + tasklets[1] = 0; + tasklets[2] = rq->nr_sectors; + tasklets[3] = (rq->nr_sectors>>8); + if (rq->nr_sectors == 65536) { + tasklets[2] = 0x00; + tasklets[3] = 0x00; + } + tasklets[4] = (task_ioreg_t) block; + tasklets[5] = (task_ioreg_t) (block>>8); + tasklets[6] = (task_ioreg_t) (block>>16); + tasklets[7] = (task_ioreg_t) (block>>24); + tasklets[8] = (task_ioreg_t) 0; + tasklets[9] = (task_ioreg_t) 0; +// tasklets[8] = (task_ioreg_t) (block>>32); +// tasklets[9] = (task_ioreg_t) (block>>40); +#ifdef DEBUG + printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n", + drive->name, + (rq->cmd==READ)?"read":"writ", + block, + rq->nr_sectors, + (unsigned long) rq->buffer, + block); + printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n", + drive->name, tasklets[3], tasklets[2], + tasklets[9], tasklets[8], tasklets[7], + tasklets[6], tasklets[5], tasklets[4]); +#endif + OUT_BYTE(tasklets[1], IDE_FEATURE_REG); + OUT_BYTE(tasklets[3], IDE_NSECTOR_REG); + OUT_BYTE(tasklets[7], IDE_SECTOR_REG); + OUT_BYTE(tasklets[8], IDE_LCYL_REG); + OUT_BYTE(tasklets[9], IDE_HCYL_REG); + + OUT_BYTE(tasklets[0], IDE_FEATURE_REG); + OUT_BYTE(tasklets[2], IDE_NSECTOR_REG); + OUT_BYTE(tasklets[4], IDE_SECTOR_REG); + OUT_BYTE(tasklets[5], IDE_LCYL_REG); + OUT_BYTE(tasklets[6], IDE_HCYL_REG); + OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG); + } else { +#ifdef DEBUG + printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", + block, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + OUT_BYTE(0x00, IDE_FEATURE_REG); + OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG); + OUT_BYTE(block,IDE_SECTOR_REG); + OUT_BYTE(block>>=8,IDE_LCYL_REG); + OUT_BYTE(block>>=8,IDE_HCYL_REG); + OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG); + } + } else { + unsigned int sect,head,cyl,track; + track = block / drive->sect; + sect = block % drive->sect + 1; + OUT_BYTE(sect,IDE_SECTOR_REG); + head = track % drive->head; + cyl = track / drive->head; + + OUT_BYTE(0x00, IDE_FEATURE_REG); + OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG); + OUT_BYTE(cyl,IDE_LCYL_REG); + OUT_BYTE(cyl>>8,IDE_HCYL_REG); + OUT_BYTE(head|drive->select.all,IDE_SELECT_REG); +#ifdef DEBUG + printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", cyl, + head, sect, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + } +#ifdef CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *); + return do_pdc4030_io (drive, rq); + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ + if (rq->cmd == READ) { +#ifdef CONFIG_BLK_DEV_IDEDMA + if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive))) + return ide_started; +#endif /* CONFIG_BLK_DEV_IDEDMA */ + ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG); + } + return ide_started; + } + if (rq->cmd == WRITE) { + ide_startstop_t startstop; +#ifdef CONFIG_BLK_DEV_IDEDMA + if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive))) + return ide_started; +#endif /* CONFIG_BLK_DEV_IDEDMA */ + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG); + } + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + if (!drive->unmask) + __cli(); /* local CPU only */ + if (drive->mult_count) { + ide_hwgroup_t *hwgroup = HWGROUP(drive); + /* + * Ugh.. this part looks ugly because we MUST set up + * the interrupt handler before outputting the first block + * of data to be written. If we hit an error (corrupted buffer list) + * in ide_multwrite(), then we need to remove the handler/timer + * before returning. Fortunately, this NEVER happens (right?). + * + * Except when you get an error it seems... + */ + hwgroup->wrq = *rq; /* scratchpad */ + ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL); + if (ide_multwrite(drive, drive->mult_count)) { + unsigned long flags; + spin_lock_irqsave(&io_request_lock, flags); + hwgroup->handler = NULL; + del_timer(&hwgroup->timer); + spin_unlock_irqrestore(&io_request_lock, flags); + return ide_stopped; + } + } else { + ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); + idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + } + return ide_started; + } + printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; +} + +#endif /* __TASKFILE__IO */ + +static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + MOD_INC_USE_COUNT; + if (drive->removable && drive->usage == 1) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_DOORLOCK; + check_disk_change(inode->i_rdev); + /* + * Ignore the return code from door_lock, + * since the open() has already succeeded, + * and the door_lock is irrelevant at this point. + */ + if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL)) + drive->doorlocking = 0; + } + return 0; +} + +static int do_idedisk_flushcache(ide_drive_t *drive); + +static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + if (drive->removable && !drive->usage) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_DOORUNLOCK; + invalidate_bdev(inode->i_bdev, 0); + if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL)) + drive->doorlocking = 0; + } + if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) + if (do_idedisk_flushcache(drive)) + printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", + drive->name); + MOD_DEC_USE_COUNT; +} + +static int idedisk_media_change (ide_drive_t *drive) +{ + return drive->removable; /* if removable, always assume it was changed */ +} + +static void idedisk_revalidate (ide_drive_t *drive) +{ + grok_partitions(HWIF(drive)->gd, drive->select.b.unit, + 1<<PARTN_BITS, + current_capacity(drive)); +} + +/* + * Queries for true maximum capacity of the drive. + * Returns maximum LBA address (> 0) of the drive, 0 if failed. + */ +static unsigned long idedisk_read_native_max_address(ide_drive_t *drive) +{ + ide_task_t args; + unsigned long addr = 0; + + if (!(drive->id->command_set_1 & 0x0400) && + !(drive->id->cfs_enable_2 & 0x0100)) + return addr; + + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX; + args.handler = task_no_data_intr; + + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24) + | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16) + | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8) + | ((args.tfRegister[IDE_SECTOR_OFFSET] )); + } + addr++; /* since the return value is (maxlba - 1), we add 1 */ + return addr; +} + +static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive) +{ + ide_task_t args; + unsigned long long addr = 0; + + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX_EXT; + args.handler = task_no_data_intr; + + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) | + ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) | + (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); + u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) | + ((args.tfRegister[IDE_LCYL_OFFSET])<<8) | + (args.tfRegister[IDE_SECTOR_OFFSET]); + addr = ((__u64)high << 24) | low; + } + addr++; /* since the return value is (maxlba - 1), we add 1 */ + return addr; +} + +#ifdef CONFIG_IDEDISK_STROKE +/* + * Sets maximum virtual LBA address of the drive. + * Returns new maximum virtual LBA address (> 0) or 0 on failure. + */ +static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req) +{ + ide_task_t args; + unsigned long addr_set = 0; + + addr_req--; + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff); + args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >> 8) & 0xff); + args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >> 16) & 0xff); + args.tfRegister[IDE_SELECT_OFFSET] = ((addr_req >> 24) & 0x0f) | 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX; + args.handler = task_no_data_intr; + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + /* if OK, read new maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24) + | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16) + | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8) + | ((args.tfRegister[IDE_SECTOR_OFFSET] )); + } + addr_set++; + return addr_set; +} + +static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req) +{ + ide_task_t args; + unsigned long long addr_set = 0; + + addr_req--; + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff); + args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >>= 8) & 0xff); + args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >>= 8) & 0xff); + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX_EXT; + args.hobRegister[IDE_SECTOR_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_LCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_HCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_SELECT_OFFSET_HOB] = 0x40; + args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80); + args.handler = task_no_data_intr; + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) | + ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) | + (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); + u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) | + ((args.tfRegister[IDE_LCYL_OFFSET])<<8) | + (args.tfRegister[IDE_SECTOR_OFFSET]); + addr_set = ((__u64)high << 24) | low; + } + return addr_set; +} + +/* + * Tests if the drive supports Host Protected Area feature. + * Returns true if supported, false otherwise. + */ +static inline int idedisk_supports_host_protected_area(ide_drive_t *drive) +{ + int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0; + printk("%s: host protected area => %d\n", drive->name, flag); + return flag; +} + +#endif /* CONFIG_IDEDISK_STROKE */ + +/* + * Compute drive->capacity, the full capacity of the drive + * Called with drive->id != NULL. + * + * To compute capacity, this uses either of + * + * 1. CHS value set by user (whatever user sets will be trusted) + * 2. LBA value from target drive (require new ATA feature) + * 3. LBA value from system BIOS (new one is OK, old one may break) + * 4. CHS value from system BIOS (traditional style) + * + * in above order (i.e., if value of higher priority is available, + * reset will be ignored). + */ +static void init_idedisk_capacity (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + unsigned long capacity = drive->cyl * drive->head * drive->sect; + unsigned long set_max = idedisk_read_native_max_address(drive); + unsigned long long capacity_2 = capacity; + unsigned long long set_max_ext; + + drive->capacity48 = 0; + drive->select.b.lba = 0; + + if (id->cfs_enable_2 & 0x0400) { + capacity_2 = id->lba_capacity_2; + drive->head = drive->bios_head = 255; + drive->sect = drive->bios_sect = 63; + drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect); + drive->select.b.lba = 1; + set_max_ext = idedisk_read_native_max_address_ext(drive); + if (set_max_ext > capacity_2) { +#ifdef CONFIG_IDEDISK_STROKE + set_max_ext = idedisk_read_native_max_address_ext(drive); + set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext); + if (set_max_ext) { + drive->capacity48 = capacity_2 = set_max_ext; + drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect); + drive->select.b.lba = 1; + drive->id->lba_capacity_2 = capacity_2; + } +#else /* !CONFIG_IDEDISK_STROKE */ + printk("%s: setmax_ext LBA %llu, native %llu\n", + drive->name, set_max_ext, capacity_2); +#endif /* CONFIG_IDEDISK_STROKE */ + } + drive->bios_cyl = drive->cyl; + drive->capacity48 = capacity_2; + drive->capacity = (unsigned long) capacity_2; + return; + /* Determine capacity, and use LBA if the drive properly supports it */ + } else if ((id->capability & 2) && lba_capacity_is_ok(id)) { + capacity = id->lba_capacity; + drive->cyl = capacity / (drive->head * drive->sect); + drive->select.b.lba = 1; + } + + if (set_max > capacity) { +#ifdef CONFIG_IDEDISK_STROKE + set_max = idedisk_read_native_max_address(drive); + set_max = idedisk_set_max_address(drive, set_max); + if (set_max) { + drive->capacity = capacity = set_max; + drive->cyl = set_max / (drive->head * drive->sect); + drive->select.b.lba = 1; + drive->id->lba_capacity = capacity; + } +#else /* !CONFIG_IDEDISK_STROKE */ + printk("%s: setmax LBA %lu, native %lu\n", + drive->name, set_max, capacity); +#endif /* CONFIG_IDEDISK_STROKE */ + } + + drive->capacity = capacity; + + if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) { + drive->capacity48 = id->lba_capacity_2; + drive->head = 255; + drive->sect = 63; + drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect); + } +} + +static unsigned long idedisk_capacity (ide_drive_t *drive) +{ + if (drive->id->cfs_enable_2 & 0x0400) + return (drive->capacity48 - drive->sect0); + return (drive->capacity - drive->sect0); +} + +static ide_startstop_t idedisk_special (ide_drive_t *drive) +{ + special_t *s = &drive->special; + + if (s->b.set_geometry) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_handler_t *handler = NULL; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + s->b.set_geometry = 0; + taskfile.sector_number = drive->sect; + taskfile.low_cylinder = drive->cyl; + taskfile.high_cylinder = drive->cyl>>8; + taskfile.device_head = ((drive->head-1)|drive->select.all)&0xBF; + if (!IS_PDC4030_DRIVE) { + taskfile.sector_count = drive->sect; + taskfile.command = WIN_SPECIFY; + handler = ide_handler_parser(&taskfile, &hobfile); + } + do_taskfile(drive, &taskfile, &hobfile, handler); + } else if (s->b.recalibrate) { + s->b.recalibrate = 0; + if (!IS_PDC4030_DRIVE) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->sect; + taskfile.command = WIN_RESTORE; + do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile)); + } + } else if (s->b.set_multmode) { + s->b.set_multmode = 0; + if (drive->id && drive->mult_req > drive->id->max_multsect) + drive->mult_req = drive->id->max_multsect; + if (!IS_PDC4030_DRIVE) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->mult_req; + taskfile.command = WIN_SETMULT; + do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile)); + } + } else if (s->all) { + int special = s->all; + s->all = 0; + printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special); + return ide_stopped; + } + return IS_PDC4030_DRIVE ? ide_stopped : ide_started; +} + +static void idedisk_pre_reset (ide_drive_t *drive) +{ + int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1; + + drive->special.all = 0; + drive->special.b.set_geometry = legacy; + drive->special.b.recalibrate = legacy; + if (OK_TO_RESET_CONTROLLER) + drive->mult_count = 0; + if (!drive->keep_settings && !drive->using_dma) + drive->mult_req = 0; + if (drive->mult_req != drive->mult_count) + drive->special.b.set_multmode = 1; +} + +#ifdef CONFIG_PROC_FS + +static int smart_enable(ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_ENABLE; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int get_smart_values(ide_drive_t *drive, byte *buf) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_READ_VALUES; + taskfile.sector_count = 0x01; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + (void) smart_enable(drive); + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); +} + +static int get_smart_thresholds(ide_drive_t *drive, byte *buf) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_READ_THRESHOLDS; + taskfile.sector_count = 0x01; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + (void) smart_enable(drive); + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); +} + +static int proc_idedisk_read_cache + (char *page, char **start, off_t off, int count, int *eof, void *data) +{ + ide_drive_t *drive = (ide_drive_t *) data; + char *out = page; + int len; + + if (drive->id) + len = sprintf(out,"%i\n", drive->id->buf_size / 2); + else + len = sprintf(out,"(none)\n"); + PROC_IDE_READ_RETURN(page,start,off,count,eof,len); +} + +static int proc_idedisk_read_smart_thresholds + (char *page, char **start, off_t off, int count, int *eof, void *data) +{ + ide_drive_t *drive = (ide_drive_t *)data; + int len = 0, i = 0; + + if (!get_smart_thresholds(drive, page)) { + unsigned short *val = (unsigned short *) page; + char *out = ((char *)val) + (SECTOR_WORDS * 4); + page = out; + do { + out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n'); + val += 1; + } while (i < (SECTOR_WORDS * 2)); + len = out - page; + } + PROC_IDE_READ_RETURN(page,start,off,count,eof,len); +} + +static int proc_idedisk_read_smart_values + (char *page, char **start, off_t off, int count, int *eof, void *data) +{ + ide_drive_t *drive = (ide_drive_t *)data; + int len = 0, i = 0; + + if (!get_smart_values(drive, page)) { + unsigned short *val = (unsigned short *) page; + char *out = ((char *)val) + (SECTOR_WORDS * 4); + page = out; + do { + out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n'); + val += 1; + } while (i < (SECTOR_WORDS * 2)); + len = out - page; + } + PROC_IDE_READ_RETURN(page,start,off,count,eof,len); +} + +static ide_proc_entry_t idedisk_proc[] = { + { "cache", S_IFREG|S_IRUGO, proc_idedisk_read_cache, NULL }, + { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL }, + { "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_smart_values, NULL }, + { "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_smart_thresholds, NULL }, + { NULL, 0, NULL, NULL } +}; + +#else + +#define idedisk_proc NULL + +#endif /* CONFIG_PROC_FS */ + +static int set_multcount(ide_drive_t *drive, int arg) +{ +#ifdef __TASKFILE__IO + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + + if (drive->special.b.set_multmode) + return -EBUSY; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->mult_req; + taskfile.command = WIN_SETMULT; + drive->mult_req = arg; + drive->special.b.set_multmode = 1; + ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +#else /* !__TASKFILE__IO */ + struct request rq; + + if (drive->special.b.set_multmode) + return -EBUSY; + ide_init_drive_cmd (&rq); + rq.cmd = IDE_DRIVE_CMD; + drive->mult_req = arg; + drive->special.b.set_multmode = 1; + (void) ide_do_drive_cmd (drive, &rq, ide_wait); +#endif /* __TASKFILE__IO */ + return (drive->mult_count == arg) ? 0 : -EIO; +} + +static int set_nowerr(ide_drive_t *drive, int arg) +{ + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; + drive->nowerr = arg; + drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; + spin_unlock_irq(&io_request_lock); + return 0; +} + +static int write_cache (ide_drive_t *drive, int arg) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; + taskfile.command = WIN_SETFEATURES; + + if (!(drive->id->cfs_enable_2 & 0x3000)) + return 1; + + (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); + drive->wcache = arg; + return 0; +} + +static int do_idedisk_standby (ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_STANDBYNOW1; + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int do_idedisk_flushcache (ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + if (drive->id->cfs_enable_2 & 0x2400) { + taskfile.command = WIN_FLUSH_CACHE_EXT; + } else { + taskfile.command = WIN_FLUSH_CACHE; + } + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int set_acoustic (ide_drive_t *drive, int arg) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + taskfile.feature = (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM; + taskfile.sector_count = arg; + + taskfile.command = WIN_SETFEATURES; + (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); + drive->acoustic = arg; + return 0; +} + +static int probe_lba_addressing (ide_drive_t *drive, int arg) +{ + drive->addressing = 0; + + if (!(drive->id->cfs_enable_2 & 0x0400)) + return -EIO; + + drive->addressing = arg; + return 0; +} + +static int set_lba_addressing (ide_drive_t *drive, int arg) +{ + return (probe_lba_addressing(drive, arg)); +} + +static void idedisk_add_settings(ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; +#if 0 + int major = HWIF(drive)->major; + int minor = drive->select.b.unit << PARTN_BITS; +#endif + + ide_add_setting(drive, "bios_cyl", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->bios_cyl, NULL); + ide_add_setting(drive, "bios_head", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); + ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); + ide_add_setting(drive, "address", SETTING_RW, HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, TYPE_INTA, 0, 2, 1, 1, &drive->addressing, set_lba_addressing); + ide_add_setting(drive, "bswap", SETTING_READ, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->bswap, NULL); + ide_add_setting(drive, "multcount", id ? SETTING_RW : SETTING_READ, HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, TYPE_BYTE, 0, id ? id->max_multsect : 0, 1, 1, &drive->mult_count, set_multcount); + ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); +#if 0 + ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 1, &read_ahead[major], NULL); + ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL); + ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 1, &max_sectors[major][minor], NULL); +#endif + ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); + ide_add_setting(drive, "wcache", SETTING_RW, HDIO_GET_WCACHE, HDIO_SET_WCACHE, TYPE_BYTE, 0, 1, 1, 1, &drive->wcache, write_cache); + ide_add_setting(drive, "acoustic", SETTING_RW, HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, TYPE_BYTE, 0, 254, 1, 1, &drive->acoustic, set_acoustic); + ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); + ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); +} + +static void idedisk_setup (ide_drive_t *drive) +{ + int i; + + struct hd_driveid *id = drive->id; + unsigned long capacity; + + idedisk_add_settings(drive); + + if (id == NULL) + return; + + /* + * CompactFlash cards and their brethern look just like hard drives + * to us, but they are removable and don't have a doorlock mechanism. + */ + if (drive->removable && !drive_is_flashcard(drive)) { + /* + * Removable disks (eg. SYQUEST); ignore 'WD' drives + */ + if (id->model[0] != 'W' || id->model[1] != 'D') { + drive->doorlocking = 1; + } + } + for (i = 0; i < MAX_DRIVES; ++i) { + ide_hwif_t *hwif = HWIF(drive); + + if (drive != &hwif->drives[i]) continue; +#ifdef DEVFS_MUST_DIE + hwif->gd->de_arr[i] = drive->de; +#endif + if (drive->removable) + hwif->gd->flags[i] |= GENHD_FL_REMOVABLE; + break; + } + + /* Extract geometry if we did not already have one for the drive */ + if (!drive->cyl || !drive->head || !drive->sect) { + drive->cyl = drive->bios_cyl = id->cyls; + drive->head = drive->bios_head = id->heads; + drive->sect = drive->bios_sect = id->sectors; + } + + /* Handle logical geometry translation by the drive */ + if ((id->field_valid & 1) && id->cur_cyls && + id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) { + drive->cyl = id->cur_cyls; + drive->head = id->cur_heads; + drive->sect = id->cur_sectors; + } + + /* Use physical geometry if what we have still makes no sense */ + if (drive->head > 16 && id->heads && id->heads <= 16) { + drive->cyl = id->cyls; + drive->head = id->heads; + drive->sect = id->sectors; + } + + /* calculate drive capacity, and select LBA if possible */ + init_idedisk_capacity (drive); + + /* + * if possible, give fdisk access to more of the drive, + * by correcting bios_cyls: + */ + capacity = idedisk_capacity (drive); + if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) && + (!drive->forced_geom) && drive->bios_sect && drive->bios_head) + drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head; + printk (KERN_INFO "XEN %s: %ld sectors", drive->name, capacity); + + /* Give size in megabytes (MB), not mebibytes (MiB). */ + /* We compute the exact rounded value, avoiding overflow. */ + printk (" (%ld MB)", (capacity - capacity/625 + 974)/1950); + + /* Only print cache size when it was specified */ + if (id->buf_size) + printk (" w/%dKiB Cache", id->buf_size/2); + + printk(", CHS=%d/%d/%d", + drive->bios_cyl, drive->bios_head, drive->bios_sect); +#ifdef CONFIG_BLK_DEV_IDEDMA + if (drive->using_dma) + (void) HWIF(drive)->dmaproc(ide_dma_verbose, drive); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + printk("\n"); + + drive->mult_count = 0; + if (id->max_multsect) { +#ifdef CONFIG_IDEDISK_MULTI_MODE + id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0; + id->multsect_valid = id->multsect ? 1 : 0; + drive->mult_req = id->multsect_valid ? id->max_multsect : INITIAL_MULT_COUNT; + drive->special.b.set_multmode = drive->mult_req ? 1 : 0; +#else /* original, pre IDE-NFG, per request of AC */ + drive->mult_req = INITIAL_MULT_COUNT; + if (drive->mult_req > id->max_multsect) + drive->mult_req = id->max_multsect; + if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) + drive->special.b.set_multmode = 1; +#endif /* CONFIG_IDEDISK_MULTI_MODE */ + } + drive->no_io_32bit = id->dword_io ? 1 : 0; + if (drive->id->cfs_enable_2 & 0x3000) + write_cache(drive, (id->cfs_enable_2 & 0x3000)); + (void) probe_lba_addressing(drive, 1); +} + +static int idedisk_cleanup (ide_drive_t *drive) +{ + if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) + if (do_idedisk_flushcache(drive)) + printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", + drive->name); + return ide_unregister_subdriver(drive); +} + +int idedisk_reinit(ide_drive_t *drive); + +/* + * IDE subdriver functions, registered with ide.c + */ +static ide_driver_t idedisk_driver = { + name: "ide-disk", + version: IDEDISK_VERSION, + media: ide_disk, + busy: 0, + supports_dma: 1, + supports_dsc_overlap: 0, + cleanup: idedisk_cleanup, + standby: do_idedisk_standby, + flushcache: do_idedisk_flushcache, + do_request: do_rw_disk, + end_request: NULL, + ioctl: NULL, + open: idedisk_open, + release: idedisk_release, + media_change: idedisk_media_change, + revalidate: idedisk_revalidate, + pre_reset: idedisk_pre_reset, + capacity: idedisk_capacity, + special: idedisk_special, + /*proc: idedisk_proc,*/ + reinit: idedisk_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, +}; + +int idedisk_init (void); +static ide_module_t idedisk_module = { + IDE_DRIVER_MODULE, + idedisk_init, + &idedisk_driver, + NULL +}; + +MODULE_DESCRIPTION("ATA DISK Driver"); + +int idedisk_reinit (ide_drive_t *drive) +{ + int failed = 0; + + MOD_INC_USE_COUNT; + + if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); + return 1; + } + DRIVER(drive)->busy++; + idedisk_setup(drive); + if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { + printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); + (void) idedisk_cleanup(drive); + DRIVER(drive)->busy--; + return 1; + } + DRIVER(drive)->busy--; + failed--; + + ide_register_module(&idedisk_module); + MOD_DEC_USE_COUNT; + return 0; +} + +static void __exit idedisk_exit (void) +{ + ide_drive_t *drive; + int failed = 0; + + while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, &idedisk_driver, failed)) != NULL) { + if (idedisk_cleanup (drive)) { + printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name); + failed++; + } + /* We must remove proc entries defined in this module. + Otherwise we oops while accessing these entries */ +#ifdef CONFIG_PROC_FS + if (drive->proc) + ide_remove_proc_entries(drive->proc, idedisk_proc); +#endif + } + ide_unregister_module(&idedisk_module); +} + +int idedisk_init (void) +{ + ide_drive_t *drive; + int failed = 0; + + MOD_INC_USE_COUNT; + while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, NULL, failed++)) != NULL) { + if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); + continue; + } + DRIVER(drive)->busy++; + idedisk_setup(drive); + if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { + printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); + (void) idedisk_cleanup(drive); + DRIVER(drive)->busy--; + continue; + } + DRIVER(drive)->busy--; + failed--; + } + ide_register_module(&idedisk_module); + MOD_DEC_USE_COUNT; + return 0; +} + +module_init(idedisk_init); +module_exit(idedisk_exit); +MODULE_LICENSE("GPL"); diff --git a/xen-2.4.16/drivers/ide/ide-probe.c b/xen-2.4.16/drivers/ide/ide-probe.c index e83157ec01..99f38dfcb8 100644 --- a/xen-2.4.16/drivers/ide/ide-probe.c +++ b/xen-2.4.16/drivers/ide/ide-probe.c @@ -51,11 +51,18 @@ #include <asm/uaccess.h> #include <asm/io.h> +#define IDE_PROBE_TRACE 0 + static inline void do_identify (ide_drive_t *drive, byte cmd) { int bswap = 1; struct hd_driveid *id; + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::do_identify\n"); + } + id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC); /* called with interrupts disabled! */ if (!id) { printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n"); @@ -201,6 +208,11 @@ static int actual_try_to_identify (ide_drive_t *drive, byte cmd) unsigned long timeout; byte s, a; + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::actual_try_to_identify\n"); + } + if (IDE_CONTROL_REG) { /* take a deep breath */ ide_delay_50ms(); @@ -260,6 +272,11 @@ static int try_to_identify (ide_drive_t *drive, byte cmd) int autoprobe = 0; unsigned long cookie = 0; + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::try_to_identify\n"); + } + if (IDE_CONTROL_REG && !HWIF(drive)->irq) { autoprobe = 1; cookie = probe_irq_on(); @@ -314,6 +331,12 @@ static int do_probe (ide_drive_t *drive, byte cmd) { int rc; ide_hwif_t *hwif = HWIF(drive); + + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::do_probe\n"); + } + if (drive->present) { /* avoid waiting for inappropriate probes */ if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) return 4; @@ -372,6 +395,11 @@ static void enable_nest (ide_drive_t *drive) { unsigned long timeout; + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::enable_nest\n"); + } + printk("%s: enabling %s -- ", HWIF(drive)->name, drive->id->model); SELECT_DRIVE(HWIF(drive), drive); ide_delay_50ms(); @@ -402,6 +430,11 @@ static void enable_nest (ide_drive_t *drive) */ static inline byte probe_for_drive (ide_drive_t *drive) { + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::probe_for_drive\n"); + } + if (drive->noprobe) /* skip probing? */ return drive->present; if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */ @@ -500,6 +533,11 @@ static void probe_hwif (ide_hwif_t *hwif) unsigned int unit; unsigned long flags; + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::probe_hwif\n"); + } + if (hwif->noprobe) return; #ifdef CONFIG_BLK_DEV_IDE @@ -978,6 +1016,11 @@ int ideprobe_init (void) { unsigned int index; int probe[MAX_HWIFS]; + + if (IDE_PROBE_TRACE) + { + printk (KERN_ALERT "ide-probe::ideprobe_init\n"); + } MOD_INC_USE_COUNT; memset(probe, 0, MAX_HWIFS * sizeof(int)); diff --git a/xen-2.4.16/drivers/ide/ide-taskfile.c b/xen-2.4.16/drivers/ide/ide-taskfile.c index 34bfacebfe..578af55156 100644 --- a/xen-2.4.16/drivers/ide/ide-taskfile.c +++ b/xen-2.4.16/drivers/ide/ide-taskfile.c @@ -171,6 +171,8 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) struct hd_driveid *id = drive->id; byte HIHI = (drive->addressing) ? 0xE0 : 0xEF; + printk(KERN_ALERT "do_rw_taskfile\n"); + /* (ks/hs): Moved to start, do not use for multiple out commands */ if (task->handler != task_mulout_intr) { if (IDE_CONTROL_REG) diff --git a/xen-2.4.16/drivers/ide/ide-xeno.c b/xen-2.4.16/drivers/ide/ide-xeno.c new file mode 100644 index 0000000000..eb7e6cab58 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-xeno.c @@ -0,0 +1,46 @@ +#include <xeno/config.h> +#include <xeno/types.h> +#include <xeno/lib.h> +#include <xeno/ide.h> +#include <hypervisor-ifs/block.h> + + +void +ide_probe_devices (xen_disk_info_t* xdi) +{ + int loop; + + for (loop = 0; loop < MAX_HWIFS; ++loop) + { + ide_hwif_t *hwif = &ide_hwifs[loop]; + if (hwif->present) + { + struct gendisk *gd = hwif->gd; + unsigned int unit; + + for (unit = 0; unit < MAX_DRIVES; ++unit) + { + unsigned long capacity; + + ide_drive_t *drive = &hwif->drives[unit]; + + if (drive->present) + { + capacity = current_capacity (drive); + + xdi->disks[xdi->count].type = XEN_DISK_IDE; + xdi->disks[xdi->count].capacity = capacity; + xdi->count++; + + printk (KERN_ALERT "IDE-XENO %d\n", xdi->count); + printk (KERN_ALERT " capacity 0x%x\n", capacity); + printk (KERN_ALERT " head 0x%x\n", drive->bios_head); + printk (KERN_ALERT " sector 0x%x\n", drive->bios_sect); + printk (KERN_ALERT " cylinder 0x%x\n", drive->bios_cyl); + } + } + } + } + + return; +} diff --git a/xen-2.4.16/drivers/ide/ide.c b/xen-2.4.16/drivers/ide/ide.c index af3694bf85..51cee21f77 100644 --- a/xen-2.4.16/drivers/ide/ide.c +++ b/xen-2.4.16/drivers/ide/ide.c @@ -1391,6 +1391,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) block = rq->sector; blockend = block + rq->nr_sectors; + +#ifdef NEVER if ((rq->cmd == READ || rq->cmd == WRITE) && (drive->media == ide_disk || drive->media == ide_floppy)) { if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { @@ -1404,6 +1406,15 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) possibly killing some innocent following sector */ if (block == 0 && drive->remap_0_to_1 == 1) block = 1; /* redirect MBR access to EZ-Drive partn table */ +#endif + +#ifdef NEVER_DEBUG + { + printk(" ide::start_request %lx %lx %lx %lx %lx\n", + rq->sector, rq->nr_sectors, block, + drive->part[minor&PARTN_MASK].start_sect, drive->sect0); + } +#endif #if (DISK_RECOVERY_TIME > 0) while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME); @@ -1414,6 +1425,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) printk("%s: drive not ready for command\n", drive->name); return startstop; } + drive->special.all = 0; if (!drive->special.all) { switch(rq->cmd) { case IDE_DRIVE_CMD: diff --git a/xen-2.4.16/drivers/ide/piix.c b/xen-2.4.16/drivers/ide/piix.c new file mode 100644 index 0000000000..ca6629e9ef --- /dev/null +++ b/xen-2.4.16/drivers/ide/piix.c @@ -0,0 +1,536 @@ +/* + * linux/drivers/ide/piix.c Version 0.32 June 9, 2000 + * + * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer + * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org> + * May be copied or modified under the terms of the GNU General Public License + * + * PIO mode setting function for Intel chipsets. + * For use instead of BIOS settings. + * + * 40-41 + * 42-43 + * + * 41 + * 43 + * + * | PIO 0 | c0 | 80 | 0 | piix_tune_drive(drive, 0); + * | PIO 2 | SW2 | d0 | 90 | 4 | piix_tune_drive(drive, 2); + * | PIO 3 | MW1 | e1 | a1 | 9 | piix_tune_drive(drive, 3); + * | PIO 4 | MW2 | e3 | a3 | b | piix_tune_drive(drive, 4); + * + * sitre = word40 & 0x4000; primary + * sitre = word42 & 0x4000; secondary + * + * 44 8421|8421 hdd|hdb + * + * 48 8421 hdd|hdc|hdb|hda udma enabled + * + * 0001 hda + * 0010 hdb + * 0100 hdc + * 1000 hdd + * + * 4a 84|21 hdb|hda + * 4b 84|21 hdd|hdc + * + * ata-33/82371AB + * ata-33/82371EB + * ata-33/82801AB ata-66/82801AA + * 00|00 udma 0 00|00 reserved + * 01|01 udma 1 01|01 udma 3 + * 10|10 udma 2 10|10 udma 4 + * 11|11 reserved 11|11 reserved + * + * 54 8421|8421 ata66 drive|ata66 enable + * + * pci_read_config_word(HWIF(drive)->pci_dev, 0x40, ®40); + * pci_read_config_word(HWIF(drive)->pci_dev, 0x42, ®42); + * pci_read_config_word(HWIF(drive)->pci_dev, 0x44, ®44); + * pci_read_config_word(HWIF(drive)->pci_dev, 0x48, ®48); + * pci_read_config_word(HWIF(drive)->pci_dev, 0x4a, ®4a); + * pci_read_config_word(HWIF(drive)->pci_dev, 0x54, ®54); + * + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ioport.h> +#include <linux/pci.h> +#include <linux/hdreg.h> +#include <linux/ide.h> +#include <linux/delay.h> +#include <linux/init.h> + +#include <asm/io.h> + +#include "ide_modes.h" + +#define PIIX_DEBUG_DRIVE_INFO 0 + +#define DISPLAY_PIIX_TIMINGS + +#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS) +#include <linux/stat.h> +#include <linux/proc_fs.h> + +static int piix_get_info(char *, char **, off_t, int); +extern int (*piix_display_info)(char *, char **, off_t, int); /* ide-proc.c */ +extern char *ide_media_verbose(ide_drive_t *); +static struct pci_dev *bmide_dev; + +static int piix_get_info (char *buffer, char **addr, off_t offset, int count) +{ + char *p = buffer; + u32 bibma = pci_resource_start(bmide_dev, 4); + u16 reg40 = 0, psitre = 0, reg42 = 0, ssitre = 0; + u8 c0 = 0, c1 = 0; + u8 reg44 = 0, reg48 = 0, reg4a = 0, reg4b = 0, reg54 = 0, reg55 = 0; + + switch(bmide_dev->device) { + case PCI_DEVICE_ID_INTEL_82801BA_8: + case PCI_DEVICE_ID_INTEL_82801BA_9: + case PCI_DEVICE_ID_INTEL_82801CA_10: + p += sprintf(p, "\n Intel PIIX4 Ultra 100 Chipset.\n"); + break; + case PCI_DEVICE_ID_INTEL_82372FB_1: + case PCI_DEVICE_ID_INTEL_82801AA_1: + p += sprintf(p, "\n Intel PIIX4 Ultra 66 Chipset.\n"); + break; + case PCI_DEVICE_ID_INTEL_82451NX: + case PCI_DEVICE_ID_INTEL_82801AB_1: + case PCI_DEVICE_ID_INTEL_82443MX_1: + case PCI_DEVICE_ID_INTEL_82371AB: + p += sprintf(p, "\n Intel PIIX4 Ultra 33 Chipset.\n"); + break; + case PCI_DEVICE_ID_INTEL_82371SB_1: + p += sprintf(p, "\n Intel PIIX3 Chipset.\n"); + break; + case PCI_DEVICE_ID_INTEL_82371MX: + p += sprintf(p, "\n Intel MPIIX Chipset.\n"); + return p-buffer; /* => must be less than 4k! */ + case PCI_DEVICE_ID_INTEL_82371FB_1: + case PCI_DEVICE_ID_INTEL_82371FB_0: + default: + p += sprintf(p, "\n Intel PIIX Chipset.\n"); + break; + } + + pci_read_config_word(bmide_dev, 0x40, ®40); + pci_read_config_word(bmide_dev, 0x42, ®42); + pci_read_config_byte(bmide_dev, 0x44, ®44); + pci_read_config_byte(bmide_dev, 0x48, ®48); + pci_read_config_byte(bmide_dev, 0x4a, ®4a); + pci_read_config_byte(bmide_dev, 0x4b, ®4b); + pci_read_config_byte(bmide_dev, 0x54, ®54); + pci_read_config_byte(bmide_dev, 0x55, ®55); + + psitre = (reg40 & 0x4000) ? 1 : 0; + ssitre = (reg42 & 0x4000) ? 1 : 0; + + /* + * at that point bibma+0x2 et bibma+0xa are byte registers + * to investigate: + */ + c0 = inb_p((unsigned short)bibma + 0x02); + c1 = inb_p((unsigned short)bibma + 0x0a); + + p += sprintf(p, "--------------- Primary Channel ---------------- Secondary Channel -------------\n"); + p += sprintf(p, " %sabled %sabled\n", + (c0&0x80) ? "dis" : " en", + (c1&0x80) ? "dis" : " en"); + p += sprintf(p, "--------------- drive0 --------- drive1 -------- drive0 ---------- drive1 ------\n"); + p += sprintf(p, "DMA enabled: %s %s %s %s\n", + (c0&0x20) ? "yes" : "no ", + (c0&0x40) ? "yes" : "no ", + (c1&0x20) ? "yes" : "no ", + (c1&0x40) ? "yes" : "no " ); + p += sprintf(p, "UDMA enabled: %s %s %s %s\n", + (reg48&0x01) ? "yes" : "no ", + (reg48&0x02) ? "yes" : "no ", + (reg48&0x04) ? "yes" : "no ", + (reg48&0x08) ? "yes" : "no " ); + p += sprintf(p, "UDMA enabled: %s %s %s %s\n", + ((reg54&0x11) && (reg55&0x10) && (reg4a&0x01)) ? "5" : + ((reg54&0x11) && (reg4a&0x02)) ? "4" : + ((reg54&0x11) && (reg4a&0x01)) ? "3" : + (reg4a&0x02) ? "2" : + (reg4a&0x01) ? "1" : + (reg4a&0x00) ? "0" : "X", + ((reg54&0x22) && (reg55&0x20) && (reg4a&0x10)) ? "5" : + ((reg54&0x22) && (reg4a&0x20)) ? "4" : + ((reg54&0x22) && (reg4a&0x10)) ? "3" : + (reg4a&0x20) ? "2" : + (reg4a&0x10) ? "1" : + (reg4a&0x00) ? "0" : "X", + ((reg54&0x44) && (reg55&0x40) && (reg4b&0x03)) ? "5" : + ((reg54&0x44) && (reg4b&0x02)) ? "4" : + ((reg54&0x44) && (reg4b&0x01)) ? "3" : + (reg4b&0x02) ? "2" : + (reg4b&0x01) ? "1" : + (reg4b&0x00) ? "0" : "X", + ((reg54&0x88) && (reg55&0x80) && (reg4b&0x30)) ? "5" : + ((reg54&0x88) && (reg4b&0x20)) ? "4" : + ((reg54&0x88) && (reg4b&0x10)) ? "3" : + (reg4b&0x20) ? "2" : + (reg4b&0x10) ? "1" : + (reg4b&0x00) ? "0" : "X"); + + p += sprintf(p, "UDMA\n"); + p += sprintf(p, "DMA\n"); + p += sprintf(p, "PIO\n"); + +/* + * FIXME.... Add configuration junk data....blah blah...... + */ + + return p-buffer; /* => must be less than 4k! */ +} +#endif /* defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS) */ + +/* + * Used to set Fifo configuration via kernel command line: + */ + +byte piix_proc = 0; + +extern char *ide_xfer_verbose (byte xfer_rate); + +#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING) +/* + * + */ +static byte piix_dma_2_pio (byte xfer_rate) { + switch(xfer_rate) { + case XFER_UDMA_5: + case XFER_UDMA_4: + case XFER_UDMA_3: + case XFER_UDMA_2: + case XFER_UDMA_1: + case XFER_UDMA_0: + case XFER_MW_DMA_2: + case XFER_PIO_4: + return 4; + case XFER_MW_DMA_1: + case XFER_PIO_3: + return 3; + case XFER_SW_DMA_2: + case XFER_PIO_2: + return 2; + case XFER_MW_DMA_0: + case XFER_SW_DMA_1: + case XFER_SW_DMA_0: + case XFER_PIO_1: + case XFER_PIO_0: + case XFER_PIO_SLOW: + default: + return 0; + } +} +#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */ + +/* + * Based on settings done by AMI BIOS + * (might be useful if drive is not registered in CMOS for any reason). + */ +static void piix_tune_drive (ide_drive_t *drive, byte pio) +{ + unsigned long flags; + u16 master_data; + byte slave_data; + int is_slave = (&HWIF(drive)->drives[1] == drive); + int master_port = HWIF(drive)->index ? 0x42 : 0x40; + int slave_port = 0x44; + /* ISP RTC */ + byte timings[][2] = { { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 2, 1 }, + { 2, 3 }, }; + + pio = ide_get_best_pio_mode(drive, pio, 5, NULL); + pci_read_config_word(HWIF(drive)->pci_dev, master_port, &master_data); + if (is_slave) { + master_data = master_data | 0x4000; + if (pio > 1) + /* enable PPE, IE and TIME */ + master_data = master_data | 0x0070; + pci_read_config_byte(HWIF(drive)->pci_dev, slave_port, &slave_data); + slave_data = slave_data & (HWIF(drive)->index ? 0x0f : 0xf0); + slave_data = slave_data | ((timings[pio][0] << 2) | (timings[pio][1] + << (HWIF(drive)->index ? 4 : 0))); + } else { + master_data = master_data & 0xccf8; + if (pio > 1) + /* enable PPE, IE and TIME */ + master_data = master_data | 0x0007; + master_data = master_data | (timings[pio][0] << 12) | + (timings[pio][1] << 8); + } + save_flags(flags); + cli(); + pci_write_config_word(HWIF(drive)->pci_dev, master_port, master_data); + if (is_slave) + pci_write_config_byte(HWIF(drive)->pci_dev, slave_port, slave_data); + restore_flags(flags); +} + +#if defined(CONFIG_BLK_DEV_IDEDMA) && defined(CONFIG_PIIX_TUNING) +static int piix_tune_chipset (ide_drive_t *drive, byte speed) +{ + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + byte maslave = hwif->channel ? 0x42 : 0x40; + int a_speed = 3 << (drive->dn * 4); + int u_flag = 1 << drive->dn; + int v_flag = 0x01 << drive->dn; + int w_flag = 0x10 << drive->dn; + int u_speed = 0; + int err = 0; + int sitre; + short reg4042, reg44, reg48, reg4a, reg54; + byte reg55; + + pci_read_config_word(dev, maslave, ®4042); + sitre = (reg4042 & 0x4000) ? 1 : 0; + pci_read_config_word(dev, 0x44, ®44); + pci_read_config_word(dev, 0x48, ®48); + pci_read_config_word(dev, 0x4a, ®4a); + pci_read_config_word(dev, 0x54, ®54); + pci_read_config_byte(dev, 0x55, ®55); + + switch(speed) { + case XFER_UDMA_4: + case XFER_UDMA_2: u_speed = 2 << (drive->dn * 4); break; + case XFER_UDMA_5: + case XFER_UDMA_3: + case XFER_UDMA_1: u_speed = 1 << (drive->dn * 4); break; + case XFER_UDMA_0: u_speed = 0 << (drive->dn * 4); break; + case XFER_MW_DMA_2: + case XFER_MW_DMA_1: + case XFER_SW_DMA_2: break; + default: return -1; + } + + if (speed >= XFER_UDMA_0) { + if (!(reg48 & u_flag)) + pci_write_config_word(dev, 0x48, reg48|u_flag); + if (speed == XFER_UDMA_5) { + pci_write_config_byte(dev, 0x55, (byte) reg55|w_flag); + } else { + pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag); + } + if (!(reg4a & u_speed)) { + pci_write_config_word(dev, 0x4a, reg4a & ~a_speed); + pci_write_config_word(dev, 0x4a, reg4a|u_speed); + } + if (speed > XFER_UDMA_2) { + if (!(reg54 & v_flag)) { + pci_write_config_word(dev, 0x54, reg54|v_flag); + } + } else { + pci_write_config_word(dev, 0x54, reg54 & ~v_flag); + } + } + if (speed < XFER_UDMA_0) { + if (reg48 & u_flag) + pci_write_config_word(dev, 0x48, reg48 & ~u_flag); + if (reg4a & a_speed) + pci_write_config_word(dev, 0x4a, reg4a & ~a_speed); + if (reg54 & v_flag) + pci_write_config_word(dev, 0x54, reg54 & ~v_flag); + if (reg55 & w_flag) + pci_write_config_byte(dev, 0x55, (byte) reg55 & ~w_flag); + } + + piix_tune_drive(drive, piix_dma_2_pio(speed)); + +#if PIIX_DEBUG_DRIVE_INFO + printk("%s: %s drive%d\n", drive->name, ide_xfer_verbose(speed), drive->dn); +#endif /* PIIX_DEBUG_DRIVE_INFO */ + if (!drive->init_speed) + drive->init_speed = speed; + err = ide_config_drive_speed(drive, speed); + drive->current_speed = speed; + return err; +} + +static int piix_config_drive_for_dma (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + ide_hwif_t *hwif = HWIF(drive); + struct pci_dev *dev = hwif->pci_dev; + byte speed; + + byte udma_66 = eighty_ninty_three(drive); + int ultra100 = ((dev->device == PCI_DEVICE_ID_INTEL_82801BA_8) || + (dev->device == PCI_DEVICE_ID_INTEL_82801BA_9) || + (dev->device == PCI_DEVICE_ID_INTEL_82801CA_10)) ? 1 : 0; + int ultra66 = ((ultra100) || + (dev->device == PCI_DEVICE_ID_INTEL_82801AA_1) || + (dev->device == PCI_DEVICE_ID_INTEL_82372FB_1)) ? 1 : 0; + int ultra = ((ultra66) || + (dev->device == PCI_DEVICE_ID_INTEL_82371AB) || + (dev->device == PCI_DEVICE_ID_INTEL_82443MX_1) || + (dev->device == PCI_DEVICE_ID_INTEL_82451NX) || + (dev->device == PCI_DEVICE_ID_INTEL_82801AB_1)) ? 1 : 0; + + if ((id->dma_ultra & 0x0020) && (udma_66) && (ultra100)) { + speed = XFER_UDMA_5; + } else if ((id->dma_ultra & 0x0010) && (ultra)) { + speed = ((udma_66) && (ultra66)) ? XFER_UDMA_4 : XFER_UDMA_2; + } else if ((id->dma_ultra & 0x0008) && (ultra)) { + speed = ((udma_66) && (ultra66)) ? XFER_UDMA_3 : XFER_UDMA_1; + } else if ((id->dma_ultra & 0x0004) && (ultra)) { + speed = XFER_UDMA_2; + } else if ((id->dma_ultra & 0x0002) && (ultra)) { + speed = XFER_UDMA_1; + } else if ((id->dma_ultra & 0x0001) && (ultra)) { + speed = XFER_UDMA_0; + } else if (id->dma_mword & 0x0004) { + speed = XFER_MW_DMA_2; + } else if (id->dma_mword & 0x0002) { + speed = XFER_MW_DMA_1; + } else if (id->dma_1word & 0x0004) { + speed = XFER_SW_DMA_2; + } else { + speed = XFER_PIO_0 + ide_get_best_pio_mode(drive, 255, 5, NULL); + } + + (void) piix_tune_chipset(drive, speed); + + return ((int) ((id->dma_ultra >> 11) & 7) ? ide_dma_on : + ((id->dma_ultra >> 8) & 7) ? ide_dma_on : + ((id->dma_mword >> 8) & 7) ? ide_dma_on : + ((id->dma_1word >> 8) & 7) ? ide_dma_on : + ide_dma_off_quietly); +} + +static void config_chipset_for_pio (ide_drive_t *drive) +{ + piix_tune_drive(drive, ide_get_best_pio_mode(drive, 255, 5, NULL)); +} + +static int config_drive_xfer_rate (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + ide_dma_action_t dma_func = ide_dma_on; + + if (id && (id->capability & 1) && HWIF(drive)->autodma) { + /* Consult the list of known "bad" drives */ + if (ide_dmaproc(ide_dma_bad_drive, drive)) { + dma_func = ide_dma_off; + goto fast_ata_pio; + } + dma_func = ide_dma_off_quietly; + if (id->field_valid & 4) { + if (id->dma_ultra & 0x002F) { + /* Force if Capable UltraDMA */ + dma_func = piix_config_drive_for_dma(drive); + if ((id->field_valid & 2) && + (dma_func != ide_dma_on)) + goto try_dma_modes; + } + } else if (id->field_valid & 2) { +try_dma_modes: + if ((id->dma_mword & 0x0007) || + (id->dma_1word & 0x007)) { + /* Force if Capable regular DMA modes */ + dma_func = piix_config_drive_for_dma(drive); + if (dma_func != ide_dma_on) + goto no_dma_set; + } + } else if (ide_dmaproc(ide_dma_good_drive, drive)) { + if (id->eide_dma_time > 150) { + goto no_dma_set; + } + /* Consult the list of known "good" drives */ + dma_func = piix_config_drive_for_dma(drive); + if (dma_func != ide_dma_on) + goto no_dma_set; + } else { + goto fast_ata_pio; + } + } else if ((id->capability & 8) || (id->field_valid & 2)) { +fast_ata_pio: + dma_func = ide_dma_off_quietly; +no_dma_set: + config_chipset_for_pio(drive); + } + return HWIF(drive)->dmaproc(dma_func, drive); +} + +static int piix_dmaproc(ide_dma_action_t func, ide_drive_t *drive) +{ + switch (func) { + case ide_dma_check: + return config_drive_xfer_rate(drive); + default : + break; + } + /* Other cases are done by generic IDE-DMA code. */ + return ide_dmaproc(func, drive); +} +#endif /* defined(CONFIG_BLK_DEV_IDEDMA) && (CONFIG_PIIX_TUNING) */ + +unsigned int __init pci_init_piix (struct pci_dev *dev, const char *name) +{ +#if defined(DISPLAY_PIIX_TIMINGS) && defined(CONFIG_PROC_FS) + if (!piix_proc) { + piix_proc = 1; + bmide_dev = dev; + piix_display_info = &piix_get_info; + } +#endif /* DISPLAY_PIIX_TIMINGS && CONFIG_PROC_FS */ + return 0; +} + +/* + * Sheesh, someone at Intel needs to go read the ATA-4/5 T13 standards. + * It does not specify device detection, but channel!!! + * You determine later if bit 13 of word93 is set... + */ +unsigned int __init ata66_piix (ide_hwif_t *hwif) +{ + byte reg54h = 0, reg55h = 0, ata66 = 0; + byte mask = hwif->channel ? 0xc0 : 0x30; + + pci_read_config_byte(hwif->pci_dev, 0x54, ®54h); + pci_read_config_byte(hwif->pci_dev, 0x55, ®55h); + + ata66 = (reg54h & mask) ? 1 : 0; + + return ata66; +} + +void __init ide_init_piix (ide_hwif_t *hwif) +{ +#ifndef CONFIG_IA64 + if (!hwif->irq) + hwif->irq = hwif->channel ? 15 : 14; +#endif /* CONFIG_IA64 */ + + if (hwif->pci_dev->device == PCI_DEVICE_ID_INTEL_82371MX) { + /* This is a painful system best to let it self tune for now */ + return; + } + + hwif->tuneproc = &piix_tune_drive; + hwif->drives[0].autotune = 1; + hwif->drives[1].autotune = 1; + + if (!hwif->dma_base) + return; + +#ifndef CONFIG_BLK_DEV_IDEDMA + hwif->autodma = 0; +#else /* CONFIG_BLK_DEV_IDEDMA */ +#ifdef CONFIG_PIIX_TUNING + if (!noautodma) + hwif->autodma = 1; + hwif->dmaproc = &piix_dmaproc; + hwif->speedproc = &piix_tune_chipset; +#endif /* CONFIG_PIIX_TUNING */ +#endif /* !CONFIG_BLK_DEV_IDEDMA */ +} diff --git a/xen-2.4.16/drivers/net/Makefile b/xen-2.4.16/drivers/net/Makefile index 8b752b9712..11fecb571b 100644 --- a/xen-2.4.16/drivers/net/Makefile +++ b/xen-2.4.16/drivers/net/Makefile @@ -5,12 +5,11 @@ default: $(OBJS) $(MAKE) -C ne $(MAKE) -C tulip $(MAKE) -C e1000 - $(LD) -r -o driver.o $(OBJS) tulip/tulip.o e1000/e1000.o ne/ne_drv.o + $(LD) -r -o driver.o e1000/e1000.o $(OBJS) tulip/tulip.o ne/ne_drv.o clean: $(MAKE) -C ne clean $(MAKE) -C tulip clean - $(MAKE) -C e1000 clean rm -f *.o *~ core .PHONY: default clean diff --git a/xen-2.4.16/drivers/net/e1000/e1000.h b/xen-2.4.16/drivers/net/e1000/e1000.h index ec7c27aa7e..d94e390ba3 100644 --- a/xen-2.4.16/drivers/net/e1000/e1000.h +++ b/xen-2.4.16/drivers/net/e1000/e1000.h @@ -77,14 +77,13 @@ struct e1000_adapter; // XEN XXX -#define DBG 1 +// #define DBG 1 #include "e1000_hw.h" #if DBG #define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args) #else -XXX #define E1000_DBG(args...) #endif diff --git a/xen-2.4.16/drivers/net/e1000/e1000_hw.c b/xen-2.4.16/drivers/net/e1000/e1000_hw.c index 91053751c7..1d70dab937 100644 --- a/xen-2.4.16/drivers/net/e1000/e1000_hw.c +++ b/xen-2.4.16/drivers/net/e1000/e1000_hw.c @@ -1879,7 +1879,7 @@ e1000_read_phy_reg(struct e1000_hw *hw, uint32_t mdic = 0; const uint32_t phy_addr = 1; - DEBUGFUNC("e1000_read_phy_reg"); + DEBUGFUNC("XXXXe1000_read_phy_reg"); if(reg_addr > MAX_PHY_REG_ADDRESS) { DEBUGOUT1("PHY Address %d is out of range\n", reg_addr); diff --git a/xen-2.4.16/drivers/net/e1000/e1000_main.c b/xen-2.4.16/drivers/net/e1000/e1000_main.c index 0d3b62505a..8afbe394c2 100644 --- a/xen-2.4.16/drivers/net/e1000/e1000_main.c +++ b/xen-2.4.16/drivers/net/e1000/e1000_main.c @@ -301,11 +301,9 @@ e1000_reset(struct e1000_adapter *adapter) adapter->hw.fc = adapter->hw.original_fc; e1000_reset_hw(&adapter->hw); -printk("RESET_H/W\n"); if(adapter->hw.mac_type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, WUC, 0); e1000_init_hw(&adapter->hw); -printk("INIT H/W\n"); e1000_reset_adaptive(&adapter->hw); e1000_phy_get_info(&adapter->hw, &adapter->phy_info); } @@ -470,14 +468,12 @@ e1000_probe(struct pci_dev *pdev, printk(KERN_INFO "%s: %s\n", netdev->name, adapter->id_string); e1000_check_options(adapter); -printk("OPTIONS OVER\n"); /* Initial Wake on LAN setting * If APM wake is enabled in the EEPROM, * enable the ACPI Magic Packet filter */ e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL2_REG, &eeprom_data); -printk("EPROM OVER\n"); if((adapter->hw.mac_type >= e1000_82544) && (eeprom_data & E1000_EEPROM_APME)) adapter->wol |= E1000_WUFC_MAG; @@ -485,7 +481,6 @@ printk("EPROM OVER\n"); /* reset the hardware with the new settings */ e1000_reset(adapter); -printk("PROBE OVER\n"); cards_found++; return 0; diff --git a/xen-2.4.16/drivers/net/e1000/e1000_osdep.h b/xen-2.4.16/drivers/net/e1000/e1000_osdep.h index e51e083472..fe3fff2af7 100644 --- a/xen-2.4.16/drivers/net/e1000/e1000_osdep.h +++ b/xen-2.4.16/drivers/net/e1000/e1000_osdep.h @@ -45,7 +45,7 @@ #define msec_delay(x) {\ int s=jiffies+1+((x*HZ)/1000); \ printk("mdelay(%d) called -- spin\n",x); \ - while(jiffies<s); printk("mdelay over\n");} + while(jiffies<s); } #if 0 /******************** NOT in XEN ! *******/ @@ -73,7 +73,7 @@ typedef enum { #define ASSERT(x) if(!(x)) BUG() #define MSGOUT(S, A, B) printk(KERN_DEBUG S "\n", A, B) -#define DBG 1 +//#define DBG 1 #if DBG #define DEBUGOUT(S) printk(KERN_DEBUG S "\n") diff --git a/xen-2.4.16/include/asm-i386/apic.h b/xen-2.4.16/include/asm-i386/apic.h index 1f5670943b..574cc23203 100644 --- a/xen-2.4.16/include/asm-i386/apic.h +++ b/xen-2.4.16/include/asm-i386/apic.h @@ -1,9 +1,12 @@ #ifndef __ASM_APIC_H #define __ASM_APIC_H -#include <asm/system.h> -#include <asm/ptrace.h> +//#include <linux/config.h> +//#include <linux/pm.h> #include <asm/apicdef.h> +#include <asm/system.h> + +#ifdef CONFIG_X86_LOCAL_APIC #define APIC_DEBUG 0 @@ -37,9 +40,15 @@ static __inline__ void apic_wait_icr_idle(void) do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); } -#define FORCE_READ_AROUND_WRITE 0 -#define apic_read_around(x) -#define apic_write_around(x,y) apic_write((x),(y)) +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +# define apic_read_around(x) +# define apic_write_around(x,y) apic_write((x),(y)) +#else +# define FORCE_READ_AROUND_WRITE 1 +# define apic_read_around(x) apic_read(x) +# define apic_write_around(x,y) apic_write_atomic((x),(y)) +#endif static inline void ack_APIC_irq(void) { @@ -64,8 +73,24 @@ extern void init_bsp_APIC (void); extern void setup_local_APIC (void); extern void init_apic_mappings (void); extern void setup_APIC_clocks (void); +extern void setup_apic_nmi_watchdog (void); +extern inline void nmi_watchdog_tick (struct pt_regs * regs); extern int APIC_init_uniprocessor (void); +extern void disable_APIC_timer(void); +extern void enable_APIC_timer(void); + +//extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback); +//extern void apic_pm_unregister(struct pm_dev*); extern unsigned int apic_timer_irqs [NR_CPUS]; +extern int check_nmi_watchdog (void); + +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + +#endif /* CONFIG_X86_LOCAL_APIC */ #endif /* __ASM_APIC_H */ diff --git a/xen-2.4.16/include/asm-i386/apicdef.h b/xen-2.4.16/include/asm-i386/apicdef.h index f855a7d88d..227bfca652 100644 --- a/xen-2.4.16/include/asm-i386/apicdef.h +++ b/xen-2.4.16/include/asm-i386/apicdef.h @@ -32,6 +32,8 @@ #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) #define APIC_ALL_CPUS 0xFF #define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */ +#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */ #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1<<9) #define APIC_SPIV_APIC_ENABLED (1<<8) @@ -57,6 +59,7 @@ #define APIC_INT_LEVELTRIG 0x08000 #define APIC_INT_ASSERT 0x04000 #define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_PHYSICAL 0x00000 #define APIC_DEST_LOGICAL 0x00800 #define APIC_DM_FIXED 0x00000 #define APIC_DM_LOWEST 0x00100 @@ -107,7 +110,19 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#ifdef CONFIG_X86_CLUSTERED_APIC +#define MAX_IO_APICS 32 +#else #define MAX_IO_APICS 8 +#endif + + +/* + * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs + * don't broadcast (yet?), but if they did, they might use 0xFFFF. + */ +#define APIC_BROADCAST_ID_XAPIC (0xFF) +#define APIC_BROADCAST_ID_APIC (0x0F) /* * the local APIC register structure, memory mapped. Not terribly well diff --git a/xen-2.4.16/include/asm-i386/io_apic.h b/xen-2.4.16/include/asm-i386/io_apic.h index a03be4d733..44916209a8 100644 --- a/xen-2.4.16/include/asm-i386/io_apic.h +++ b/xen-2.4.16/include/asm-i386/io_apic.h @@ -15,7 +15,8 @@ #define APIC_MISMATCH_DEBUG #define IO_APIC_BASE(idx) \ - ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) + ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) /* * The structure of the IO-APIC: @@ -96,7 +97,7 @@ extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_config_intsrc *mp_irqs; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; @@ -132,8 +133,7 @@ static inline void io_apic_sync(unsigned int apic) } /* 1 if "noapic" boot option passed */ -//extern int skip_ioapic_setup; -#define skip_ioapic_setup 0 +extern int skip_ioapic_setup; /* * If we use the IO-APIC for IRQ routing, disable automatic diff --git a/xen-2.4.16/include/asm-i386/mpspec.h b/xen-2.4.16/include/asm-i386/mpspec.h index 2598ea02db..2829cb54a3 100644 --- a/xen-2.4.16/include/asm-i386/mpspec.h +++ b/xen-2.4.16/include/asm-i386/mpspec.h @@ -1,6 +1,7 @@ #ifndef __ASM_MPSPEC_H #define __ASM_MPSPEC_H + /* * Structure definitions for SMP machines following the * Intel Multiprocessing Specification 1.1 and 1.4. @@ -13,8 +14,15 @@ #define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') -/* Maximum of 16 APICs with the current APIC ID architecture. */ +/* + * a maximum of 16 APICs with the current APIC ID architecture. + * xAPICs can have up to 256. SAPICs have 16 ID bits. + */ +#ifdef CONFIG_X86_CLUSTERED_APIC +#define MAX_APICS 256 +#else #define MAX_APICS 16 +#endif #define MAX_MPC_ENTRY 1024 @@ -178,7 +186,11 @@ struct mpc_config_translation * 7 2 CPU MCA+PCI */ +#ifdef CONFIG_MULTIQUAD +#define MAX_IRQ_SOURCES 512 +#else /* !CONFIG_MULTIQUAD */ #define MAX_IRQ_SOURCES 256 +#endif /* CONFIG_MULTIQUAD */ #define MAX_MP_BUSSES 32 enum mp_bustype { @@ -187,8 +199,11 @@ enum mp_bustype { MP_BUS_PCI, MP_BUS_MCA }; -extern int mp_bus_id_to_type [MAX_MP_BUSSES]; -extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; +extern int *mp_bus_id_to_type; +extern int *mp_bus_id_to_node; +extern int *mp_bus_id_to_local; +extern int *mp_bus_id_to_pci_bus; +extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; extern unsigned int boot_cpu_physical_apicid; extern unsigned long phys_cpu_present_map; @@ -197,11 +212,9 @@ extern void find_smp_config (void); extern void get_smp_config (void); extern int nr_ioapics; extern int apic_version [MAX_APICS]; -extern int mp_bus_id_to_type [MAX_MP_BUSSES]; extern int mp_irq_entries; -extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; +extern struct mpc_config_intsrc *mp_irqs; extern int mpc_default_type; -extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; extern int mp_current_pci_id; extern unsigned long mp_lapic_addr; extern int pic_mode; diff --git a/xen-2.4.16/include/asm-i386/processor.h b/xen-2.4.16/include/asm-i386/processor.h index f7f949d82b..36a50b2976 100644 --- a/xen-2.4.16/include/asm-i386/processor.h +++ b/xen-2.4.16/include/asm-i386/processor.h @@ -358,16 +358,22 @@ struct thread_struct { trap_info_t traps[256]; }; +#define IDT_ENTRIES 256 +extern struct desc_struct idt_table[]; +extern struct desc_struct *idt_tables[]; + #define SET_DEFAULT_FAST_TRAP(_p) \ (_p)->fast_trap_idx = 0x20; \ (_p)->fast_trap_desc.a = 0; \ (_p)->fast_trap_desc.b = 0; #define CLEAR_FAST_TRAP(_p) \ - (memset(idt_table + (_p)->fast_trap_idx, 0, 8)) + (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ + 0, 8)) #define SET_FAST_TRAP(_p) \ - (memcpy(idt_table + (_p)->fast_trap_idx, &((_p)->fast_trap_desc), 8)) + (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ + &((_p)->fast_trap_desc), 8)) #define INIT_THREAD { \ sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \ diff --git a/xen-2.4.16/include/asm-i386/smpboot.h b/xen-2.4.16/include/asm-i386/smpboot.h index 67bbedbd08..ece215fab0 100644 --- a/xen-2.4.16/include/asm-i386/smpboot.h +++ b/xen-2.4.16/include/asm-i386/smpboot.h @@ -1,13 +1,82 @@ #ifndef __ASM_SMPBOOT_H #define __ASM_SMPBOOT_H -#define TRAMPOLINE_LOW phys_to_virt(0x467) -#define TRAMPOLINE_HIGH phys_to_virt(0x469) +/*emum for clustered_apic_mode values*/ +enum{ + CLUSTERED_APIC_NONE = 0, + CLUSTERED_APIC_XAPIC, + CLUSTERED_APIC_NUMAQ +}; -#define boot_cpu_apicid boot_cpu_physical_apicid +#ifdef CONFIG_X86_CLUSTERED_APIC +extern unsigned int apic_broadcast_id; +extern unsigned char clustered_apic_mode; +extern unsigned char esr_disable; +extern unsigned char int_delivery_mode; +extern unsigned int int_dest_addr_mode; +extern int cyclone_setup(char*); -/* How to map from the cpu_present_map. */ -#define cpu_present_to_apicid(apicid) (apicid) +static inline void detect_clustered_apic(char* oem, char* prod) +{ + /* + * Can't recognize Summit xAPICs at present, so use the OEM ID. + */ + if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){ + clustered_apic_mode = CLUSTERED_APIC_XAPIC; + apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; + int_dest_addr_mode = APIC_DEST_PHYSICAL; + int_delivery_mode = dest_Fixed; + esr_disable = 1; + /*Start cyclone clock*/ + cyclone_setup(0); + } + else if (!strncmp(oem, "IBM NUMA", 8)){ + clustered_apic_mode = CLUSTERED_APIC_NUMAQ; + apic_broadcast_id = APIC_BROADCAST_ID_APIC; + int_dest_addr_mode = APIC_DEST_LOGICAL; + int_delivery_mode = dest_LowestPrio; + esr_disable = 1; + } +} +#define INT_DEST_ADDR_MODE (int_dest_addr_mode) +#define INT_DELIVERY_MODE (int_delivery_mode) +#else /* CONFIG_X86_CLUSTERED_APIC */ +#define apic_broadcast_id (APIC_BROADCAST_ID_APIC) +#define clustered_apic_mode (CLUSTERED_APIC_NONE) +#define esr_disable (0) +#define detect_clustered_apic(x,y) +#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */ +#define INT_DELIVERY_MODE (dest_LowestPrio) +#endif /* CONFIG_X86_CLUSTERED_APIC */ +#define BAD_APICID 0xFFu + +#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467) +#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469) + +#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid) + +extern unsigned char raw_phys_apicid[NR_CPUS]; + +/* + * How to map from the cpu_present_map + */ +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (clustered_apic_mode == CLUSTERED_APIC_XAPIC) + return raw_phys_apicid[mps_cpu]; + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + return (mps_cpu/4)*16 + (1<<(mps_cpu%4)); + return mps_cpu; +} + +static inline unsigned long apicid_to_phys_cpu_present(int apicid) +{ + if(clustered_apic_mode) + return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3)); + return 1UL << apicid; +} + +#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) ) /* * Mappings between logical cpu number and logical / physical apicid @@ -22,7 +91,31 @@ extern volatile int cpu_2_physical_apicid[]; #define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu] #define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] #define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu] +#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */ +#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] +#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu] +#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */ #define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] #define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu] +#endif /* CONFIG_MULTIQUAD */ +#ifdef CONFIG_X86_CLUSTERED_APIC +static inline int target_cpus(void) +{ + static int cpu; + switch(clustered_apic_mode){ + case CLUSTERED_APIC_NUMAQ: + /* Broadcast intrs to local quad only. */ + return APIC_BROADCAST_ID_APIC; + case CLUSTERED_APIC_XAPIC: + /*round robin the interrupts*/ + cpu = (cpu+1)%smp_num_cpus; + return cpu_to_physical_apicid(cpu); + default: + } + return cpu_online_map; +} +#else +#define target_cpus() (0x01) +#endif #endif diff --git a/xen-2.4.16/include/hypervisor-ifs/block.h b/xen-2.4.16/include/hypervisor-ifs/block.h index 55f7a33ce2..1722a6c288 100644 --- a/xen-2.4.16/include/hypervisor-ifs/block.h +++ b/xen-2.4.16/include/hypervisor-ifs/block.h @@ -6,35 +6,80 @@ * These are the ring data structures for buffering messages between * the hypervisor and guestos's. * - * For now we'll start with our own rings for the block IO code instead - * of using the network rings. Hopefully, this will give us additional - * flexibility in the future should we choose to move away from a - * ring producer consumer communication model. */ #ifndef __BLOCK_H__ #define __BLOCK_H__ -typedef struct blk_tx_entry_st { - unsigned long addr; /* virtual address */ - unsigned long size; /* in bytes */ -} blk_tx_entry_t; +#include <linux/kdev_t.h> -typedef struct blk_rx_entry_st { - unsigned long addr; /* virtual address */ - unsigned long size; /* in bytes */ -} blk_rx_entry_t; +/* the first four definitions match fs.h */ +#define XEN_BLOCK_READ 0 +#define XEN_BLOCK_WRITE 1 +#define XEN_BLOCK_READA 2 /* currently unused */ +#define XEN_BLOCK_SPECIAL 4 /* currently unused */ +#define XEN_BLOCK_PROBE 8 /* determine io configuration from hypervisor */ +#define XEN_BLOCK_DEBUG 16 /* debug */ -typedef struct blk_ring_st { - blk_tx_entry_t *tx_ring; - unsigned int tx_prod, tx_cons, tx_event; - unsigned int tx_ring_size; +#define XEN_BLOCK_SYNC 2 +#define XEN_BLOCK_ASYNC 3 - blk_rx_entry_t *rx_ring; - unsigned int rx_prod, rx_cons, rx_event; - unsigned int rx_ring_size; +#define XEN_BLOCK_MAX_DOMAINS 32 /* NOTE: FIX THIS. VALUE SHOULD COME FROM? */ + +#define BLK_TX_RING_SIZE 256 +#define BLK_RX_RING_SIZE 256 + +#define BLK_TX_RING_MAX_ENTRIES (BLK_TX_RING_SIZE - 2) +#define BLK_RX_RING_MAX_ENTRIES (BLK_RX_RING_SIZE - 2) + +#define BLK_TX_RING_INC(_i) (((_i)+1) & (BLK_TX_RING_SIZE-1)) +#define BLK_RX_RING_INC(_i) (((_i)+1) & (BLK_RX_RING_SIZE-1)) +#define BLK_TX_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_TX_RING_SIZE-1)) +#define BLK_RX_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RX_RING_SIZE-1)) + +typedef struct blk_ring_entry +{ + void * id; /* for guest os use; used for the bh */ + int priority; /* orig sched pri, SYNC or ASYNC for now */ + int operation; /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */ + char * buffer; + unsigned long block_number; /* block number */ + unsigned short block_size; /* block size */ + kdev_t device; + unsigned long sector_number; /* real buffer location on disk */ +} blk_ring_entry_t; + +typedef struct blk_ring_st +{ + blk_ring_entry_t *tx_ring; + unsigned int tx_prod, tx_cons; + unsigned int tx_ring_size; + + blk_ring_entry_t *rx_ring; + unsigned int rx_prod, rx_cons; + unsigned int rx_ring_size; } blk_ring_t; -int blk_create_ring(int domain, unsigned long ptr); +#define MAX_XEN_DISK_COUNT 100 + +#define XEN_DISK_IDE 1 +#define XEN_DISK_SCSI 2 + +typedef struct xen_disk /* physical disk */ +{ + int type; /* disk type */ + unsigned long capacity; + unsigned char heads; /* hdreg.h::hd_geometry */ + unsigned char sectors; /* hdreg.h::hd_geometry */ + unsigned int cylinders; /* hdreg.h::hd_big_geometry */ + unsigned long start; /* hdreg.h::hd_geometry */ + void * gendisk; /* struct gendisk ptr */ +} xen_disk_t; + +typedef struct xen_disk_info +{ + int count; /* number of subsequent xen_disk_t structures to follow */ + xen_disk_t disks[100]; +} xen_disk_info_t; #endif diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h index b97fad52f2..d9113805ba 100644 --- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h @@ -85,10 +85,11 @@ typedef struct #define __HYPERVISOR_exit 8 #define __HYPERVISOR_dom0_op 9 #define __HYPERVISOR_network_op 10 -#define __HYPERVISOR_set_debugreg 11 -#define __HYPERVISOR_get_debugreg 12 -#define __HYPERVISOR_update_descriptor 13 -#define __HYPERVISOR_set_fast_trap 14 +#define __HYPERVISOR_block_io_op 11 +#define __HYPERVISOR_set_debugreg 12 +#define __HYPERVISOR_get_debugreg 13 +#define __HYPERVISOR_update_descriptor 14 +#define __HYPERVISOR_set_fast_trap 15 #define TRAP_INSTR "int $0x82" diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h index 2701eb84e3..f5c1d25d75 100644 --- a/xen-2.4.16/include/xeno/blkdev.h +++ b/xen-2.4.16/include/xeno/blkdev.h @@ -85,6 +85,7 @@ struct buffer_head { struct inode * b_inode; struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ + void *b_xen_request; /* xen request structure */ }; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); diff --git a/xen-2.4.16/include/xeno/config.h b/xen-2.4.16/include/xeno/config.h index 637281f64b..d9ffb1d95d 100644 --- a/xen-2.4.16/include/xeno/config.h +++ b/xen-2.4.16/include/xeno/config.h @@ -29,6 +29,7 @@ #define CONFIG_IDEDMA_PCI_AUTO 1 #define CONFIG_IDEDMA_AUTO 1 #define CONFIG_BLK_DEV_IDE_MODES 1 +#define CONFIG_BLK_DEV_PIIX 1 #define CONFIG_SCSI 1 #define CONFIG_BLK_DEV_SD 1 diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h index c4caa4c431..75b7959ee1 100644 --- a/xen-2.4.16/include/xeno/sched.h +++ b/xen-2.4.16/include/xeno/sched.h @@ -1,4 +1,3 @@ - #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H @@ -49,6 +48,7 @@ extern struct mm_struct init_mm; #define _HYP_EVENT_NEED_RESCHED 0 #define _HYP_EVENT_NET_RX 1 #define _HYP_EVENT_DIE 2 +#define _HYP_EVENT_BLK_RX 3 #define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */ #define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */ diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile index 9361a01ec7..74a0c6c565 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile @@ -1,3 +1,3 @@ O_TARGET := blk.o -obj-y := block.o +obj-y := xl_block.o xl_block_test.o include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c new file mode 100644 index 0000000000..0416b467a7 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c @@ -0,0 +1,827 @@ +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/fs.h> +#include <linux/hdreg.h> /* HDIO_GETGEO, et al */ +#include <linux/blkdev.h> +#include <linux/major.h> + +/* NOTE: this is drive independent, so no inclusion of ide.h */ + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> +#include <asm/io.h> +#include <asm/uaccess.h> /* put_user() */ + +#define MAJOR_NR XLBLK_MAJOR /* force defns in blk.h, must preceed include */ +static int xlblk_major = XLBLK_MAJOR; + +#include <linux/blk.h> /* must come after definition of MAJOR_NR!! */ + +/* instead of including linux/ide.h to pick up the definitiong of byte + * (and consequently screwing up blk.h, we'll just copy the definition */ +typedef unsigned char byte; + +void xlblk_ide_register_disk(int, unsigned long); + +#define XLBLK_MAX 2 /* very arbitrary */ +#define XLBLK_MAJOR_NAME "blk" +#define IDE_PARTN_BITS 6 /* from ide.h::PARTN_BITS */ +#define IDE_PARTN_MASK ((1<<IDE_PARTN_BITS)-1) /* from ide.h::PARTN_MASK */ +static int xlblk_blk_size[XLBLK_MAX]; +static int xlblk_blksize_size[XLBLK_MAX]; +static int xlblk_hardsect_size[XLBLK_MAX]; +static int xlblk_read_ahead[XLBLK_MAX]; +static int xlblk_max_sectors[XLBLK_MAX]; + +#define XLBLK_RX_IRQ _EVENT_BLK_RX +#define XLBLK_TX_IRQ _EVENT_BLK_TX + +typedef struct xlblk_device +{ + struct buffer_head *bh; + unsigned int tx_count; /* number of used slots in tx ring */ +} xlblk_device_t; + +xlblk_device_t xlblk_device; + +/* USE_REQUEST_QUEUE = 1 use (multiple) request queues + * = 0 don't use IO request queue + */ +#define USE_REQUEST_QUEUE 1 + +#define XLBLK_DEBUG 0 +#define XLBLK_DEBUG_IOCTL 0 + +/* + * disk management + */ + +xen_disk_info_t xen_disk_info; + +/* some declarations */ +void hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device, + int mode); + + +/* ------------------------------------------------------------------------ + */ + +static int xenolinux_block_open(struct inode *inode, struct file *filep) +{ + if (XLBLK_DEBUG) { + printk (KERN_ALERT "xenolinux_block_open\n"); } + return 0; +} + +static int xenolinux_block_release(struct inode *inode, struct file *filep) +{ + if (XLBLK_DEBUG) { + printk (KERN_ALERT "xenolinux_block_release\n"); } + return 0; +} + +static int xenolinux_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + int minor_dev; + + if (XLBLK_DEBUG_IOCTL) + { + printk (KERN_ALERT "xenolinux_block_ioctl\n"); + } + + /* check permissions */ + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!inode) return -EINVAL; + minor_dev = MINOR(inode->i_rdev); + if (minor_dev >= XLBLK_MAX) return -ENODEV; + + if (XLBLK_DEBUG_IOCTL) + { + printk (KERN_ALERT + " command: 0x%x, argument: 0x%lx, minor: 0x%x\n", + command, (long) argument, minor_dev); + } + + switch (command) + { + case BLKGETSIZE : + { + if (XLBLK_DEBUG_IOCTL) + { + printk (KERN_ALERT + " BLKGETSIZE: %x %lx\n", BLKGETSIZE, + (long) xen_disk_info.disks[0].capacity); + } + return put_user(xen_disk_info.disks[0].capacity, + (unsigned long *) argument); + } + case BLKRRPART : + { + if (XLBLK_DEBUG_IOCTL) { + printk (KERN_ALERT " BLKRRPART: %x\n", BLKRRPART); } + break; + } + case BLKSSZGET : + { + if (XLBLK_DEBUG_IOCTL) { + printk (KERN_ALERT " BLKSSZGET: %x 0x%x\n", BLKSSZGET, + xlblk_hardsect_size[minor_dev]); } + return xlblk_hardsect_size[minor_dev]; + } + case HDIO_GETGEO : + { + struct hd_geometry *geo = (struct hd_geometry *)argument; + + if (XLBLK_DEBUG_IOCTL) { + printk (KERN_ALERT " HDIO_GETGEO: %x\n", HDIO_GETGEO); } + + if (!argument) return -EINVAL; + /* + if (put_user(0x80, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x20b, (unsigned short *) &geo->cylinders)) return -EFAULT; + */ + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *) &geo->cylinders)) return -EFAULT; + + return 0; + } + case HDIO_GETGEO_BIG : + { + struct hd_big_geometry *geo = (struct hd_big_geometry *) argument; + + if (XLBLK_DEBUG_IOCTL) { + printk (KERN_ALERT " HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); } + + if (!argument) return -EINVAL; + /* + if (put_user(0x80, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x20b, (unsigned int *) &geo->cylinders)) return -EFAULT; + */ + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + + return 0; + } + default : + { + if (XLBLK_DEBUG_IOCTL) { + printk (KERN_ALERT " eh? unknown ioctl\n"); } + break; + } + } + + return 0; +} + +static int xenolinux_block_check(kdev_t dev) +{ + if (XLBLK_DEBUG) { + printk (KERN_ALERT "xenolinux_block_check\n"); } + return 0; +} + +static int xenolinux_block_revalidate(kdev_t dev) +{ + if (XLBLK_DEBUG) { + printk (KERN_ALERT "xenolinux_block_revalidate\n"); } + return 0; +} + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_READ, XEN_BLOCK_WRITE or XEN_BLOCK_PROBE + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + * block_number: block to read + * block_size: size of each block + * device: ide/hda is 768 or 0x300 + * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC. async requests + * will queue until a sync request is issued. + */ + +void hypervisor_request(void * id, + int operation, + char * buffer, + unsigned long block_number, + unsigned short block_size, + kdev_t device, + int mode) +{ + blk_ring_t *blk_ring = start_info.blk_ring; + int position; + void *buffer_pa, *buffer_ma; + kdev_t phys_device = (kdev_t) 0; + unsigned long sector_number = 0; + +#if 0 + printk(KERN_ALERT "[%x]", id); + printk (KERN_ALERT + "xlblk_req: id:%p op:%d, bf:%p, blk:%lu, sz:%u, dev:%x\n", + id, operation, buffer, block_number, block_size, device); +#endif + + /* XXX SMH: now need to convert guest virtual address to machine address */ + buffer_pa = (void *)virt_to_phys((unsigned long)buffer); + buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa); + +#if 0 + printk(KERN_ALERT "va %p => pa %p => ma %p\n", buffer, buffer_pa, buffer_ma); +#endif + + if (operation == XEN_BLOCK_PROBE) + { + phys_device = (kdev_t) 0; + sector_number = 0; + } + else if (operation == XEN_BLOCK_READ || operation == XEN_BLOCK_WRITE) + { + /* + * map logial major device to the physical device number + * + * XLBLK_MAJOR -> IDE0_MAJOR (123 -> 3) + */ + if (MAJOR(device) == XLBLK_MAJOR) + { + phys_device = MKDEV(IDE0_MAJOR, 0); + } + else + { + printk (KERN_ALERT + "error: xl_block::hypervisor_request: unknown device [0x%x]\n", + device); + BUG(); + } + + /* + * compute real buffer location on disk + * (from ll_rw_block.c::submit_bh) + */ + { + int idx = 0; + + struct gendisk *gd = (struct gendisk *) xen_disk_info.disks[idx].gendisk; + unsigned int minor = MINOR(device); + + sector_number = block_number /* * block_size >> 9 */; + + if (gd != NULL) /* if we have a partition table... */ + { + sector_number += gd->part[minor & IDE_PARTN_MASK].start_sect; + } + } + } + + /* + * CHECK TO SEE IF THERE IS SPACE IN THE RING + */ + if (BLK_TX_RING_INC(blk_ring->tx_prod) == blk_ring->tx_cons) + { + printk (KERN_ALERT "hypervisor_request: tx_cons: %d, tx_prod:%d", + blk_ring->tx_cons, blk_ring->tx_prod); + } + + /* fill out a communications ring structure + and then trap into the hypervisor */ + position = blk_ring->tx_prod; + blk_ring->tx_ring[position].id = id; + blk_ring->tx_ring[position].priority = mode; + blk_ring->tx_ring[position].operation = operation; + blk_ring->tx_ring[position].buffer = buffer_ma; + blk_ring->tx_ring[position].block_number = block_number; + blk_ring->tx_ring[position].block_size = block_size; + blk_ring->tx_ring[position].device = phys_device; + blk_ring->tx_ring[position].sector_number = sector_number; + + blk_ring->tx_prod = BLK_TX_RING_INC(blk_ring->tx_prod); + + if (mode == XEN_BLOCK_SYNC) + { + /* trap into hypervisor */ + HYPERVISOR_block_io_op(); + } + else if (mode == XEN_BLOCK_ASYNC) + { + /* for now, do nothing. the request will go in the ring and + the next sync request will trigger the hypervisor to act */ + } + else + { + /* ummm, unknown mode. */ + BUG(); + } + + return; +} + + +/* + * do_xlblk_request + * + * read a block; request is in a request queue + * + * TO DO: should probably release the io_request_lock and then re-acquire + * (see LDD p. 338) + */ + +static void do_xlblk_request (request_queue_t *rq) +{ + struct request *req; + + if (XLBLK_DEBUG) + { + printk (KERN_ALERT "xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); + } + + while (!QUEUE_EMPTY) + { + struct buffer_head *bh; + unsigned long offset; + unsigned long length; + int rw; + + req = CURRENT; + + if (XLBLK_DEBUG) + { + printk (KERN_ALERT + "do_xlblk_request %p: cmd %i, sec %lx, (%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->bh); + } + + /* is there space in the tx ring for this request? + * if the ring is full, then leave the request in the queue + * + * THIS IS A BIT BOGUS SINCE XEN COULD BE UPDATING TX_CONS + * AT THE SAME TIME + */ + { + blk_ring_t *blk_ring = start_info.blk_ring; + + if (BLK_RX_RING_INC(blk_ring->tx_prod) == blk_ring->tx_cons) + { + printk (KERN_ALERT "OOPS, TX LOOKS FULL cons: %d prod: %d\n", + blk_ring->tx_cons, blk_ring->tx_prod); + break; + } + } + + req->errors = 0; + blkdev_dequeue_request(req); + + bh = req->bh; + + while (bh) + { + + offset = bh->b_rsector << 9; + length = bh->b_size; + + rw = req->cmd; + if (rw == READA) rw= READ; + if ((rw != READ) && (rw != WRITE)) + { + printk (KERN_ALERT + "XenoLinux Virtual Block Device: bad command: %d\n", rw); + BUG(); + } + + /* + if (XLBLK_DEBUG) + { + printk (KERN_ALERT "xlblk.c::do_xlblk_request\n"); + printk (KERN_ALERT " b_blocknr: 0x%lx %ld\n", + bh->b_blocknr, bh->b_blocknr); + printk (KERN_ALERT " b_size: 0x%x %d\n", bh->b_size, bh->b_size); + printk (KERN_ALERT " b_dev: 0x%x %d\n", bh->b_dev, bh->b_dev); + printk (KERN_ALERT " b_rsector: 0x%lx %ld\n", + bh->b_rsector, bh->b_rsector); + } + */ + + hypervisor_request (req, rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size, + bh->b_dev, XEN_BLOCK_SYNC); + + bh = bh->b_reqnext; + } + } + + return; +} + +/* + * xenolinux_block_request + * + * read a block without using a request queue + */ + +static int xenolinux_block_request(request_queue_t *rq, + int rw, + struct buffer_head *bh) +{ + unsigned int minor; + unsigned long offset; + unsigned long length; + + if (XLBLK_DEBUG) { + printk (KERN_ALERT "xlblk.c::xenolinux_block_request: %lx %d %lx\n", + (unsigned long) rq, rw, (unsigned long) bh); } + /* + printk (KERN_ALERT "xlblk.c::xlblk_request: op:%d bh:%p sect:%lu sz:%u\n", + rw, bh, bh->b_rsector, bh->b_size); + */ + + minor = MINOR(bh->b_rdev); + + offset = bh->b_rsector << 9; + length = bh->b_size; + + if (rw == READA) rw= READ; + if ((rw != READ) && (rw != WRITE)) + { + printk (KERN_ALERT + "XenoLinux Virtual Block Device: bad command: %d\n", rw); + goto fail; + } + + hypervisor_request (bh, rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size, + bh->b_dev, XEN_BLOCK_SYNC); + + return 0; + + fail: + return 0; +} + +static struct block_device_operations xenolinux_block_fops = +{ + open: xenolinux_block_open, + release: xenolinux_block_release, + ioctl: xenolinux_block_ioctl, + check_media_change: xenolinux_block_check, + revalidate: xenolinux_block_revalidate, +}; + +static void xlblk_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + xlblk_device_t *dev = (xlblk_device_t *)dev_id; + blk_ring_t *blk_ring = start_info.blk_ring; + struct buffer_head *bh; + struct request *req; + int loop; + + for (loop = blk_ring->rx_cons; + loop != blk_ring->rx_prod; + loop = BLK_RX_RING_INC(loop)) + { + blk_ring_entry_t *bret = &blk_ring->rx_ring[loop]; + void *buffer_pa, *buffer_va; + + buffer_pa = machine_to_phys((unsigned long)bret->buffer); + buffer_va = phys_to_virt((unsigned long)buffer_pa); + +#if 0 + printk(KERN_ALERT "xlblk_rx_int: buffer ma %p => pa %p => va %p\n", + bret->buffer, buffer_pa, buffer_va); + + + if (XLBLK_DEBUG) + { + printk (KERN_ALERT + "xlblock::xlblk_rx_int [%s]\n", + (bret->operation == XEN_BLOCK_READ) ? "read" : "write"); + printk (KERN_ALERT + " vbuf: %lx, pbuf: %lx, blockno: %lx, size: %x, device %x\n", + (unsigned long) buffer_va, (unsigned long) bret->buffer, + bret->block_number, bret->block_size, bret->device); + printk (KERN_ALERT " bret: %p bh: %p\n", bret, bret->id); + } + + /* + printk (KERN_ALERT + "xlblk_rx: id:%p op:%d, bf:%p, blk:%lu, sz:%u, dev:%x\n", + bret->id, bret->operation, bret->buffer, bret->block_number, + bret->block_size, bret->device); + */ +#endif + + if (USE_REQUEST_QUEUE) + { + req = (struct request *)bret->id; + printk(KERN_ALERT "|%x|", req); + + if (!end_that_request_first(req, 1, "NAME")) + { + blkdev_dequeue_request(req); + + /* should be end_that_request_last(req) + to wake up waiting processes (with complete) */ + blkdev_release_request(req); + } + + /* + if (XLBLK_DEBUG) + { + int temp; + printk(KERN_ALERT + "buff: 0x%p, blkno: 0x%lx, size: 0x%x, device 0x%x [%p]\n", + vbuffer, bret->block_number, bret->block_size, bret->device, + bh->b_end_io); + + for (temp = 0; temp < bret->block_size; temp++) + { + if (temp % 16 == 0) printk ("[%4x] ", temp); + else if (temp % 4 == 0) printk (" "); + printk ("%02x", + vbuffer[temp] & 255); + if ((temp + 1) % 16 == 0) printk ("\n"); + } + printk ("\n\n"); + } + */ + +#ifdef BOGUS + req = (struct request *)bret->id; + while ((bh = req->bh) != NULL) + { + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh,1); + } + blkdev_release_request(req); +#endif /* BOGUS */ + } + else + { + bh = (struct buffer_head *)bret->id; + bh->b_end_io(bh,1); + + /* + if (XLBLK_DEBUG) + { + int temp; +#if 0 + printk(KERN_ALERT + "buff: 0x%p, blkno: 0x%lx, size: 0x%x, device 0x%x [%p]\n", + vbuffer, bret->block_number, bret->block_size, bret->device, + bh->b_end_io); +#endif + + for (temp = 0; temp < bret->block_size; temp++) + { + if (temp % 16 == 0) printk ("[%4x] ", temp); + else if (temp % 4 == 0) printk (" "); + printk ("%02x", + vbuffer[temp] & 255); + if ((temp + 1) % 16 == 0) printk ("\n"); + } + printk ("\n\n"); + } + */ + } + } + + blk_ring->rx_cons = loop; +} + +static void xlblk_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + if (XLBLK_DEBUG) { + printk (KERN_ALERT "--- xlblock::xlblk_tx_int\n"); } +} + +int __init xlblk_init(void) +{ + blk_ring_t *blk_ring = start_info.blk_ring; + int loop, error, result; + + /* + * initialize memory rings to communicate with hypervisor + */ + + if ( blk_ring == NULL ) return -ENOMEM; + + blk_ring->tx_prod = blk_ring->tx_cons = 0; + blk_ring->rx_prod = blk_ring->rx_cons = 0; + blk_ring->tx_ring = NULL; + blk_ring->rx_ring = NULL; + + blk_ring->tx_ring = kmalloc(BLK_TX_RING_SIZE * sizeof(blk_ring_entry_t), + GFP_KERNEL); + blk_ring->rx_ring = kmalloc(BLK_RX_RING_SIZE * sizeof(blk_ring_entry_t), + GFP_KERNEL); + + if ((blk_ring->tx_ring == NULL) || + (blk_ring->rx_ring == NULL)) + { + printk (KERN_ALERT + "error, could not allocate ring memory for block device\n"); + error = -ENOBUFS; + goto fail; + } + + /* + * setup soft interrupts to communicate with hypervisor + */ + + error = request_irq(XLBLK_RX_IRQ, xlblk_rx_int, 0, "xlblk-rx", + &xlblk_device); + if (error) + { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + error = request_irq(XLBLK_TX_IRQ, xlblk_tx_int, 0, "xlblk-tx", + &xlblk_device); + if (error) + { + printk(KERN_ALERT "Could not allocate transmit interrupt\n"); + free_irq(XLBLK_RX_IRQ, &xlblk_device); + goto fail; + } + + /* + * get information about physical drives + * + */ + { + /* NOTE: this should only occur in domain 0 */ + memset (&xen_disk_info, 0, sizeof(xen_disk_info)); + xen_disk_info.count = 0; + + hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info, + 0, 0, (kdev_t) 0, XEN_BLOCK_SYNC); + + { + int loop; + for (loop = 0; loop < xen_disk_info.count; loop++) + { + printk (KERN_ALERT " %2d: type: %d, capacity: %ld\n", + loop, xen_disk_info.disks[loop].type, + xen_disk_info.disks[loop].capacity); + } + } + } + + /* + * initialize device driver + */ + + SET_MODULE_OWNER(&xenolinux_block_fops); + + result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops); + if (result < 0) + { + printk (KERN_ALERT "xenolinux block: can't get major %d\n", xlblk_major); + return result; + } + + /* initialize global arrays in drivers/block/ll_rw_block.c */ + blk_size[xlblk_major] = xlblk_blk_size; + blksize_size[xlblk_major] = xlblk_blksize_size; + hardsect_size[xlblk_major] = xlblk_hardsect_size; + read_ahead[xlblk_major] = xlblk_read_ahead; + max_sectors[xlblk_major] = xlblk_max_sectors; + for (loop = 0; loop < XLBLK_MAX; loop++) + { + xlblk_blk_size[loop] = xen_disk_info.disks[0].capacity; + xlblk_blksize_size[loop] = 512; + xlblk_hardsect_size[loop] = 512; + xlblk_read_ahead[loop] = 8; + xlblk_max_sectors[loop] = 128; + } + + if (USE_REQUEST_QUEUE) + { + /* NEED TO MODIFY THIS TO HANDLE MULTIPLE QUEUES + * also, should replace do_xlblk_request with blk.h::DEVICE_REQUEST + */ + blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request); + blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0); + } + else + { + /* we don't use __make_request in ll_rw_blk */ + blk_queue_make_request(BLK_DEFAULT_QUEUE(xlblk_major), + xenolinux_block_request); + } + xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity); + + /* + * completion + */ + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver installed [device: %d]\n", + xlblk_major); + return 0; + + fail: + if (blk_ring->tx_ring) kfree(blk_ring->tx_ring); + if (blk_ring->rx_ring) kfree(blk_ring->rx_ring); + return error; +} + +void xlblk_ide_register_disk(int idx, unsigned long capacity) +{ + int units; + int minors; + struct gendisk *gd; + + /* plagarized from ide-probe.c::init_gendisk */ + + units = 2; /* from ide.h::MAX_DRIVES */ + + minors = units * (1<<IDE_PARTN_BITS); + gd = kmalloc (sizeof(struct gendisk), GFP_KERNEL); + gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + + gd->major = xlblk_major; /* our major device number */ + gd->major_name = XLBLK_MAJOR_NAME; /* treated special in genhd.c */ + gd->minor_shift = IDE_PARTN_BITS; /* num bits for partitions */ + gd->max_p = 1<<IDE_PARTN_BITS; /* 1 + max partitions / drive */ + gd->nr_real = units; /* current num real drives */ + gd->real_devices= NULL; /* ptr to internal data (was: hwif) */ + gd->next = NULL; /* linked list of major devs */ + gd->fops = &xenolinux_block_fops; /* file operations */ + gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL); + gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL); + if (gd->de_arr) memset (gd->de_arr, 0, sizeof *gd->de_arr * units); + if (gd->flags) memset (gd->flags, 0, sizeof *gd->flags * units); + add_gendisk(gd); + + xen_disk_info.disks[idx].gendisk = gd; + + /* default disk size is just a big number. in the future, we + need a message to probe the devices to determine the actual size */ + register_disk(gd, MKDEV(xlblk_major, 0), 1<<IDE_PARTN_BITS, + &xenolinux_block_fops, capacity); + + return; +} + +static void __exit xlblk_cleanup(void) +{ + /* CHANGE FOR MULTIQUEUE */ + blk_cleanup_queue(BLK_DEFAULT_QUEUE(xlblk_major)); + + /* clean up global arrays */ + read_ahead[xlblk_major] = 0; + if (blk_size[xlblk_major]) kfree(blk_size[xlblk_major]); + blk_size[xlblk_major] = NULL; + if (blksize_size[xlblk_major]) kfree(blksize_size[xlblk_major]); + blksize_size[xlblk_major] = NULL; + if (hardsect_size[xlblk_major]) kfree(hardsect_size[xlblk_major]); + hardsect_size[xlblk_major] = NULL; + + /* + * + * TODO: FOR EACH GENDISK, FREE + * + */ + + if (unregister_blkdev(xlblk_major, "block")) + { + printk(KERN_ALERT + "XenoLinux Virtual Block Device Driver uninstalled with errors\n"); + } + else + { + printk(KERN_ALERT "XenoLinux Virtual Block Device Driver uninstalled\n"); + } + + return; +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c new file mode 100644 index 0000000000..cab6d9a330 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c @@ -0,0 +1,233 @@ +/****************************************************************************** + * xenolinux_block_test.c + * + */ +#define EXPORT_SYMTAB + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <asm/hypervisor-ifs/block.h> +#include <asm/hypervisor-ifs/hypervisor-if.h> + +/******************************************************************/ + +static struct proc_dir_entry *bdt; +static blk_ring_entry_t meta; +static char * data; + +static int proc_read_bdt(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + switch (meta.operation) + { + case XEN_BLOCK_READ : + case XEN_BLOCK_WRITE : + { + return proc_dump_block(page, start, off, count, eof, data); + } + case XEN_BLOCK_DEBUG : + { + return proc_dump_debug(page, start, off, count, eof, data); + } + default : + { + printk(KERN_ALERT + "block device test error: unknown operation [%c]\n", + meta.operation); + return -EINVAL; + } + } +} + +int proc_dump_debug(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + + sprintf (header, "Block Device Test: Debug Dump\n\n"); + + sprintf (dump, "%s\n", meta.buffer); + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_dump_block(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char header[100]; + char dump[1024]; + char temp[100]; + int loop; + + sprintf (header, "Block Device Test\n\n%s blk num: %ld 0x%lx; size: %d 0x%x; device: 0x%x\n", + meta.operation == XEN_BLOCK_WRITE ? "write" : "read", + meta.block_number, meta.block_number, + meta.block_size, meta.block_size, + meta.device); + + sprintf (dump, "%s", header); + + if (meta.buffer) + { + for (loop = 0; loop < 100; loop++) + { + int i = meta.buffer[loop]; + + if (loop % 8 == 0) + { + sprintf (temp, "[%2d] ", loop); + strcat(dump, temp); + } + else if (loop % 2 == 0) + { + strcat(dump, " "); + } + + sprintf (temp, " 0x%02x", i & 255); + strcat(dump, temp); + if ((loop + 1) % 8 == 0) + { + strcat(dump, "\n"); + } + } + strcat(dump, "\n\n"); + } + + if (data) + { + kfree(data); + } + + strncpy (page, dump, count); + return strlen(page); +} + +int proc_write_bdt(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char *local = kmalloc((count + 1) * sizeof(char), GFP_KERNEL); + char opcode; + int block_number = 0; + int block_size = 0; + int device = 0; + int mode; + + if (copy_from_user(local, buffer, count)) + { + return -EFAULT; + } + local[count] = '\0'; + + sscanf(local, "%c %i %i %i", + &opcode, &block_number, &block_size, &device); + + if (opcode == 'r' || opcode == 'R') + { + meta.operation = XEN_BLOCK_READ; + } + else if (opcode == 'w' || opcode == 'W') + { + meta.operation = XEN_BLOCK_WRITE; + } + else if (opcode == 'd' || opcode == 'D') + { + meta.operation = XEN_BLOCK_DEBUG; + block_size = 10000; + } + else + { + printk(KERN_ALERT + "block device test error: unknown opcode [%c]\n", opcode); + return -EINVAL; + } + + if (opcode == 'r' || opcode == 'w' || + opcode == 'd' || opcode == 'D') + { + mode = XEN_BLOCK_SYNC; + } + else /* (opcode == 'R' || opcode == 'W') */ + { + mode = XEN_BLOCK_ASYNC; + } + + if (data) + { + kfree(data); + } + data = kmalloc(block_size * sizeof(char), GFP_KERNEL); + if (data == NULL) + { + kfree(local); + return -ENOMEM; + } + + meta.block_number = block_number; + meta.block_size = block_size; + meta.device = device; + meta.buffer = data; + + /* submit request */ + hypervisor_request(0, meta.operation, meta.buffer, + meta.block_number, meta.block_size, + meta.device, mode); + + kfree(local); + return count; +} + + +static int __init init_module(void) +{ + int return_value = 0; + + /* create proc entry */ + bdt = create_proc_entry("bdt", 0644, NULL); + if (bdt == NULL) + { + return_value = -ENOMEM; + goto error; + } + bdt->data = NULL; + bdt->read_proc = proc_read_bdt; + bdt->write_proc = proc_write_bdt; + bdt->owner = THIS_MODULE; + + memset(&meta, 0, sizeof(meta)); + + /* success */ + printk(KERN_ALERT "XenoLinux Block Device Test installed\n"); + return 0; + + error: + return return_value; +} + +static void __exit cleanup_module(void) +{ + if (data) + { + kfree(data); + } + printk(KERN_ALERT "XenoLinux Block Device Test uninstalled\n"); +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c new file mode 100644 index 0000000000..97d4a65b78 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_block.c @@ -0,0 +1,27 @@ +/* + * domain 0 block driver interface + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +static int __init init_module(void) +{ + request_module("xl_block"); + printk("Successfully installed domain 0 block interface\n"); + + + return 0; +} + +static void __exit cleanup_module(void) +{ + printk("Successfully de-installed domain-0 block interface\n"); + return 0; +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.16-sparse/drivers/block/Config.in b/xenolinux-2.4.16-sparse/drivers/block/Config.in new file mode 100644 index 0000000000..716774fe74 --- /dev/null +++ b/xenolinux-2.4.16-sparse/drivers/block/Config.in @@ -0,0 +1,51 @@ +# +# Block device driver configuration +# +mainmenu_option next_comment +comment 'Block devices' + +tristate 'Normal PC floppy disk support' CONFIG_BLK_DEV_FD +if [ "$CONFIG_AMIGA" = "y" ]; then + tristate 'Amiga floppy support' CONFIG_AMIGA_FLOPPY +fi +if [ "$CONFIG_ATARI" = "y" ]; then + tristate 'Atari floppy support' CONFIG_ATARI_FLOPPY +fi +if [ "$CONFIG_MAC" = "y" ]; then + dep_bool 'Macintosh IIfx/Quadra 900/Quadra 950 floppy support (EXPERIMENTAL)' CONFIG_BLK_DEV_SWIM_IOP $CONFIG_EXPERIMENTAL +fi +if [ "$CONFIG_MCA" = "y" ]; then + tristate 'PS/2 ESDI hard disk support' CONFIG_BLK_DEV_PS2 +fi +if [ "$CONFIG_ZORRO" = "y" ]; then + tristate 'Amiga Zorro II ramdisk support' CONFIG_AMIGA_Z2RAM +fi +if [ "$CONFIG_ATARI" = "y" ]; then + tristate 'Atari ACSI support' CONFIG_ATARI_ACSI + if [ "$CONFIG_ATARI_ACSI" != "n" ]; then + comment 'Some devices (e.g. CD jukebox) support multiple LUNs' + bool ' Probe all LUNs on each ACSI device' CONFIG_ACSI_MULTI_LUN + tristate ' Atari SLM laser printer support' CONFIG_ATARI_SLM + fi +fi +dep_tristate 'XT hard disk support' CONFIG_BLK_DEV_XD $CONFIG_ISA +dep_tristate 'Parallel port IDE device support' CONFIG_PARIDE $CONFIG_PARPORT +if [ "$CONFIG_PARIDE" = "y" -o "$CONFIG_PARIDE" = "m" ]; then + source drivers/block/paride/Config.in +fi +dep_tristate 'Compaq SMART2 support' CONFIG_BLK_CPQ_DA $CONFIG_PCI +dep_tristate 'Compaq Smart Array 5xxx support' CONFIG_BLK_CPQ_CISS_DA $CONFIG_PCI +dep_tristate 'Mylex DAC960/DAC1100 PCI RAID Controller support' CONFIG_BLK_DEV_DAC960 $CONFIG_PCI + +tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET + +tristate 'RAM disk support' CONFIG_BLK_DEV_RAM +if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then + int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 +fi +dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM + +bool 'XenoLinux virtual block device support' CONFIG_XENOLINUX_BLOCK + +endmenu diff --git a/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c index a9e973d05f..c6d5c9625b 100644 --- a/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c +++ b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c @@ -1227,6 +1227,11 @@ int __init blk_dev_init(void) #ifdef CONFIG_SUN_JSFLASH jsfd_init(); #endif + +#ifdef CONFIG_XENOLINUX_BLOCK + xlblk_init(); +#endif + return 0; }; diff --git a/xenolinux-2.4.16-sparse/fs/partitions/check.c b/xenolinux-2.4.16-sparse/fs/partitions/check.c new file mode 100644 index 0000000000..e564544ec6 --- /dev/null +++ b/xenolinux-2.4.16-sparse/fs/partitions/check.c @@ -0,0 +1,443 @@ +/* + * Code extracted from drivers/block/genhd.c + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + * + * We now have independent partition support from the + * block drivers, which allows all the partition code to + * be grouped in one location, and it to be mostly self + * contained. + * + * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl} + */ + +#include <linux/config.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/blk.h> +#include <linux/init.h> +#include <linux/raid/md.h> + +#include "check.h" + +#include "acorn.h" +#include "amiga.h" +#include "atari.h" +#include "ldm.h" +#include "mac.h" +#include "msdos.h" +#include "osf.h" +#include "sgi.h" +#include "sun.h" +#include "ibm.h" +#include "ultrix.h" + +extern int *blk_size[]; + +#define CHECK_DEBUG 0 + +int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ + +static int (*check_part[])(struct gendisk *hd, struct block_device *bdev, unsigned long first_sect, int first_minor) = { +#ifdef CONFIG_ACORN_PARTITION + acorn_partition, +#endif +#ifdef CONFIG_LDM_PARTITION + ldm_partition, /* this must come before msdos */ +#endif +#ifdef CONFIG_MSDOS_PARTITION + msdos_partition, +#endif +#ifdef CONFIG_OSF_PARTITION + osf_partition, +#endif +#ifdef CONFIG_SUN_PARTITION + sun_partition, +#endif +#ifdef CONFIG_AMIGA_PARTITION + amiga_partition, +#endif +#ifdef CONFIG_ATARI_PARTITION + atari_partition, +#endif +#ifdef CONFIG_MAC_PARTITION + mac_partition, +#endif +#ifdef CONFIG_SGI_PARTITION + sgi_partition, +#endif +#ifdef CONFIG_ULTRIX_PARTITION + ultrix_partition, +#endif +#ifdef CONFIG_IBM_PARTITION + ibm_partition, +#endif + NULL +}; + +/* + * This is ucking fugly but its probably the best thing for 2.4.x + * Take it as a clear reminder than we should put the device name + * generation in the object kdev_t points to in 2.5. + */ + +#ifdef CONFIG_ARCH_S390 +int (*genhd_dasd_name)(char*,int,int,struct gendisk*) = NULL; +EXPORT_SYMBOL(genhd_dasd_name); +#endif + +/* + * disk_name() is used by partition check code and the md driver. + * It formats the devicename of the indicated disk into + * the supplied buffer (of size at least 32), and returns + * a pointer to that same buffer (for convenience). + */ + +char *disk_name (struct gendisk *hd, int minor, char *buf) +{ + const char *maj = hd->major_name; + unsigned int unit = (minor >> hd->minor_shift); + unsigned int part = (minor & ((1 << hd->minor_shift) -1 )); + + if ((unit < hd->nr_real) && hd->part[minor].de) { + int pos; + + pos = devfs_generate_path (hd->part[minor].de, buf, 64); + if (pos >= 0) + return buf + pos; + } + +#ifdef CONFIG_ARCH_S390 + if (genhd_dasd_name + && genhd_dasd_name (buf, unit, part, hd) == 0) + return buf; +#endif + /* + * IDE devices use multiple major numbers, but the drives + * are named as: {hda,hdb}, {hdc,hdd}, {hde,hdf}, {hdg,hdh}.. + * This requires special handling here. + */ + switch (hd->major) { + case IDE9_MAJOR: + unit += 2; + case IDE8_MAJOR: + unit += 2; + case IDE7_MAJOR: + unit += 2; + case IDE6_MAJOR: + unit += 2; + case IDE5_MAJOR: + unit += 2; + case IDE4_MAJOR: + unit += 2; + case IDE3_MAJOR: + unit += 2; + case IDE2_MAJOR: + unit += 2; + case IDE1_MAJOR: + unit += 2; + case IDE0_MAJOR: + maj = "hd"; + break; + case MD_MAJOR: + sprintf(buf, "%s%d", maj, unit); + return buf; + } + if (hd->major >= SCSI_DISK1_MAJOR && hd->major <= SCSI_DISK7_MAJOR) { + unit = unit + (hd->major - SCSI_DISK1_MAJOR + 1) * 16; + if (unit+'a' > 'z') { + unit -= 26; + sprintf(buf, "sd%c%c", 'a' + unit / 26, 'a' + unit % 26); + if (part) + sprintf(buf + 4, "%d", part); + return buf; + } + } + if (hd->major >= COMPAQ_SMART2_MAJOR && hd->major <= COMPAQ_SMART2_MAJOR+7) { + int ctlr = hd->major - COMPAQ_SMART2_MAJOR; + if (part == 0) + sprintf(buf, "%s/c%dd%d", maj, ctlr, unit); + else + sprintf(buf, "%s/c%dd%dp%d", maj, ctlr, unit, part); + return buf; + } + if (hd->major >= COMPAQ_CISS_MAJOR && hd->major <= COMPAQ_CISS_MAJOR+7) { + int ctlr = hd->major - COMPAQ_CISS_MAJOR; + if (part == 0) + sprintf(buf, "%s/c%dd%d", maj, ctlr, unit); + else + sprintf(buf, "%s/c%dd%dp%d", maj, ctlr, unit, part); + return buf; + } + if (hd->major >= DAC960_MAJOR && hd->major <= DAC960_MAJOR+7) { + int ctlr = hd->major - DAC960_MAJOR; + if (part == 0) + sprintf(buf, "%s/c%dd%d", maj, ctlr, unit); + else + sprintf(buf, "%s/c%dd%dp%d", maj, ctlr, unit, part); + return buf; + } + if (hd->major == ATARAID_MAJOR) { + int disk = minor >> hd->minor_shift; + int part = minor & (( 1 << hd->minor_shift) - 1); + if (part == 0) + sprintf(buf, "%s/d%d", maj, disk); + else + sprintf(buf, "%s/d%dp%d", maj, disk, part); + return buf; + } + if (part) + sprintf(buf, "%s%c%d", maj, unit+'a', part); + else + sprintf(buf, "%s%c", maj, unit+'a'); + return buf; +} + +/* + * Add a partitions details to the devices partition description. + */ +void add_gd_partition(struct gendisk *hd, int minor, int start, int size) +{ +#ifndef CONFIG_DEVFS_FS + char buf[40]; +#endif + + hd->part[minor].start_sect = start; + hd->part[minor].nr_sects = size; +#ifdef CONFIG_DEVFS_FS + printk(" p%d", (minor & ((1 << hd->minor_shift) - 1))); +#else + if ((hd->major >= COMPAQ_SMART2_MAJOR+0 && hd->major <= COMPAQ_SMART2_MAJOR+7) || + (hd->major >= COMPAQ_CISS_MAJOR+0 && hd->major <= COMPAQ_CISS_MAJOR+7)) + printk(" p%d", (minor & ((1 << hd->minor_shift) - 1))); + else + printk(" %s", disk_name(hd, minor, buf)); +#endif +} + +static void check_partition(struct gendisk *hd, kdev_t dev, int first_part_minor) +{ + devfs_handle_t de = NULL; + static int first_time = 1; + unsigned long first_sector; + struct block_device *bdev; + char buf[64]; + int i; + + if (CHECK_DEBUG) printk (KERN_ALERT "check.c::check_partition\n"); + + if (first_time) + printk(KERN_INFO "Partition check:\n"); + first_time = 0; + first_sector = hd->part[MINOR(dev)].start_sect; + + /* + * This is a kludge to allow the partition check to be + * skipped for specific drives (e.g. IDE CD-ROM drives) + */ + if ((int)first_sector == -1) { + hd->part[MINOR(dev)].start_sect = 0; + return; + } + + if (hd->de_arr) + de = hd->de_arr[MINOR(dev) >> hd->minor_shift]; + i = devfs_generate_path (de, buf, sizeof buf); + if (i >= 0) + printk(KERN_INFO " /dev/%s:", buf + i); + else + printk(KERN_INFO " %s:", disk_name(hd, MINOR(dev), buf)); + bdev = bdget(kdev_t_to_nr(dev)); + bdev->bd_inode->i_size = (loff_t)hd->part[MINOR(dev)].nr_sects << 9; + bdev->bd_inode->i_blkbits = blksize_bits(block_size(dev)); + for (i = 0; check_part[i]; i++) { + int res; + res = check_part[i](hd, bdev, first_sector, first_part_minor); + if (res) { + if (res < 0 && warn_no_part) + printk(" unable to read partition table\n"); + goto setup_devfs; + } + } + + printk(" unknown partition table\n"); +setup_devfs: + invalidate_bdev(bdev, 1); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); + bdput(bdev); + i = first_part_minor - 1; + devfs_register_partitions (hd, i, hd->sizes ? 0 : 1); +} + +#ifdef CONFIG_DEVFS_FS +static void devfs_register_partition (struct gendisk *dev, int minor, int part) +{ + int devnum = minor >> dev->minor_shift; + devfs_handle_t dir; + unsigned int devfs_flags = DEVFS_FL_DEFAULT; + char devname[16]; + + if (dev->part[minor + part].de) return; + dir = devfs_get_parent (dev->part[minor].de); + if (!dir) return; + if ( dev->flags && (dev->flags[devnum] & GENHD_FL_REMOVABLE) ) + devfs_flags |= DEVFS_FL_REMOVABLE; + sprintf (devname, "part%d", part); + dev->part[minor + part].de = + devfs_register (dir, devname, devfs_flags, + dev->major, minor + part, + S_IFBLK | S_IRUSR | S_IWUSR, + dev->fops, NULL); +} + +static struct unique_numspace disc_numspace = UNIQUE_NUMBERSPACE_INITIALISER; + +static void devfs_register_disc (struct gendisk *dev, int minor) +{ + int pos = 0; + int devnum = minor >> dev->minor_shift; + devfs_handle_t dir, slave; + unsigned int devfs_flags = DEVFS_FL_DEFAULT; + char dirname[64], symlink[16]; + static devfs_handle_t devfs_handle; + + if (dev->part[minor].de) return; + if ( dev->flags && (dev->flags[devnum] & GENHD_FL_REMOVABLE) ) + devfs_flags |= DEVFS_FL_REMOVABLE; + if (dev->de_arr) { + dir = dev->de_arr[devnum]; + if (!dir) /* Aware driver wants to block disc management */ + return; + pos = devfs_generate_path (dir, dirname + 3, sizeof dirname-3); + if (pos < 0) return; + strncpy (dirname + pos, "../", 3); + } + else { + /* Unaware driver: construct "real" directory */ + sprintf (dirname, "../%s/disc%d", dev->major_name, devnum); + dir = devfs_mk_dir (NULL, dirname + 3, NULL); + } + if (!devfs_handle) + devfs_handle = devfs_mk_dir (NULL, "discs", NULL); + dev->part[minor].number = devfs_alloc_unique_number (&disc_numspace); + sprintf (symlink, "disc%d", dev->part[minor].number); + devfs_mk_symlink (devfs_handle, symlink, DEVFS_FL_DEFAULT, + dirname + pos, &slave, NULL); + dev->part[minor].de = + devfs_register (dir, "disc", devfs_flags, dev->major, minor, + S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); + devfs_auto_unregister (dev->part[minor].de, slave); + if (!dev->de_arr) + devfs_auto_unregister (slave, dir); +} +#endif /* CONFIG_DEVFS_FS */ + +void devfs_register_partitions (struct gendisk *dev, int minor, int unregister) +{ +#ifdef CONFIG_DEVFS_FS + int part; + + if (!unregister) + devfs_register_disc (dev, minor); + for (part = 1; part < dev->max_p; part++) { + if ( unregister || (dev->part[part + minor].nr_sects < 1) ) { + devfs_unregister (dev->part[part + minor].de); + dev->part[part + minor].de = NULL; + continue; + } + devfs_register_partition (dev, minor, part); + } + if (unregister) { + devfs_unregister (dev->part[minor].de); + dev->part[minor].de = NULL; + devfs_dealloc_unique_number (&disc_numspace, + dev->part[minor].number); + } +#endif /* CONFIG_DEVFS_FS */ +} + +/* + * This function will re-read the partition tables for a given device, + * and set things back up again. There are some important caveats, + * however. You must ensure that no one is using the device, and no one + * can start using the device while this function is being executed. + * + * Much of the cleanup from the old partition tables should have already been + * done + */ + +void register_disk(struct gendisk *gdev, kdev_t dev, unsigned minors, + struct block_device_operations *ops, long size) +{ + if (CHECK_DEBUG) + { + if (gdev != NULL) + printk (KERN_ALERT + "check.c::register_disk gdev:%p dev:%d min:%u ops:%p sz:%ld\n", + gdev, dev, minors, ops, size); + } + + if (!gdev) + return; + + grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); +} + +void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) +{ + int i; + int first_minor = drive << dev->minor_shift; + int end_minor = first_minor + dev->max_p; + + if (CHECK_DEBUG) printk (KERN_ALERT "check.c::grok_partitions\n"); + + if(!dev->sizes) + blk_size[dev->major] = NULL; + + dev->part[first_minor].nr_sects = size; + /* No such device or no minors to use for partitions */ + if (!size || minors == 1) + return; + + if (dev->sizes) { + dev->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); + for (i = first_minor + 1; i < end_minor; i++) + dev->sizes[i] = 0; + } + blk_size[dev->major] = dev->sizes; + check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor); + + /* + * We need to set the sizes array before we will be able to access + * any of the partitions on this device. + */ + if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ + for (i = first_minor; i < end_minor; i++) + dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); + } +} + +unsigned char *read_dev_sector(struct block_device *bdev, unsigned long n, Sector *p) +{ + struct address_space *mapping = bdev->bd_inode->i_mapping; + int sect = PAGE_CACHE_SIZE / 512; + struct page *page; + + page = read_cache_page(mapping, n/sect, + (filler_t *)mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page(page); + if (!Page_Uptodate(page)) + goto fail; + if (PageError(page)) + goto fail; + p->v = page; + return (unsigned char *)page_address(page) + 512 * (n % sect); +fail: + page_cache_release(page); + } + p->v = NULL; + return NULL; +} diff --git a/xenolinux-2.4.16-sparse/fs/partitions/msdos.c b/xenolinux-2.4.16-sparse/fs/partitions/msdos.c new file mode 100644 index 0000000000..34a086024e --- /dev/null +++ b/xenolinux-2.4.16-sparse/fs/partitions/msdos.c @@ -0,0 +1,642 @@ +/* + * fs/partitions/msdos.c + * + * Code extracted from drivers/block/genhd.c + * Copyright (C) 1991-1998 Linus Torvalds + * + * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * in the early extended-partition checks and added DM partitions + * + * Support for DiskManager v6.0x added by Mark Lord, + * with information provided by OnTrack. This now works for linux fdisk + * and LILO, as well as loadlin and bootln. Note that disks other than + * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1). + * + * More flexible handling of extended partitions - aeb, 950831 + * + * Check partition table on IDE disks for common CHS translations + * + * Re-organised Feb 1998 Russell King + */ + +#include <linux/config.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/string.h> +#include <linux/blk.h> + +#ifdef CONFIG_BLK_DEV_IDE +#include <linux/ide.h> /* IDE xlate */ +#endif /* CONFIG_BLK_DEV_IDE */ + +#define MSDOS_DEBUG 0 + +#include <asm/system.h> + +#include "check.h" +#include "msdos.h" + +#if CONFIG_BLK_DEV_MD +extern void md_autodetect_dev(kdev_t dev); +#endif + +/* + * Many architectures don't like unaligned accesses, which is + * frequently the case with the nr_sects and start_sect partition + * table entries. + */ +#include <asm/unaligned.h> + +#define SYS_IND(p) (get_unaligned(&p->sys_ind)) +#define NR_SECTS(p) ({ __typeof__(p->nr_sects) __a = \ + get_unaligned(&p->nr_sects); \ + le32_to_cpu(__a); \ + }) + +#define START_SECT(p) ({ __typeof__(p->start_sect) __a = \ + get_unaligned(&p->start_sect); \ + le32_to_cpu(__a); \ + }) + +static inline int is_extended_partition(struct partition *p) +{ + return (SYS_IND(p) == DOS_EXTENDED_PARTITION || + SYS_IND(p) == WIN98_EXTENDED_PARTITION || + SYS_IND(p) == LINUX_EXTENDED_PARTITION); +} + +/* + * partition_name() formats the short partition name into the supplied + * buffer, and returns a pointer to that buffer. + * Used by several partition types which makes conditional inclusion messy, + * use __attribute__ ((unused)) instead. + */ +static char __attribute__ ((unused)) + *partition_name (struct gendisk *hd, int minor, char *buf) +{ +#ifdef CONFIG_DEVFS_FS + sprintf(buf, "p%d", (minor & ((1 << hd->minor_shift) - 1))); + return buf; +#else + return disk_name(hd, minor, buf); +#endif +} + +#define MSDOS_LABEL_MAGIC1 0x55 +#define MSDOS_LABEL_MAGIC2 0xAA + +static inline int +msdos_magic_present(unsigned char *p) +{ + return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); +} + +/* + * Create devices for each logical partition in an extended partition. + * The logical partitions form a linked list, with each entry being + * a partition table with two entries. The first entry + * is the real data partition (with a start relative to the partition + * table start). The second is a pointer to the next logical partition + * (with a start relative to the entire extended partition). + * We do not create a Linux partition for the partition tables, but + * only for the actual data partitions. + */ + +static void extended_partition(struct gendisk *hd, struct block_device *bdev, + int minor, unsigned long first_size, int *current_minor) +{ + struct partition *p; + Sector sect; + unsigned char *data; + unsigned long first_sector, this_sector, this_size; + int mask = (1 << hd->minor_shift) - 1; + int sector_size = get_hardsect_size(to_kdev_t(bdev->bd_dev)) / 512; + int loopct = 0; /* number of links followed + without finding a data partition */ + int i; + + this_sector = first_sector = hd->part[minor].start_sect; + this_size = first_size; + + while (1) { + if (++loopct > 100) + return; + if ((*current_minor & mask) == 0) + return; + data = read_dev_sector(bdev, this_sector, §); + if (!data) + return; + + if (!msdos_magic_present(data + 510)) + goto done; + + p = (struct partition *) (data + 0x1be); + + /* + * Usually, the first entry is the real data partition, + * the 2nd entry is the next extended partition, or empty, + * and the 3rd and 4th entries are unused. + * However, DRDOS sometimes has the extended partition as + * the first entry (when the data partition is empty), + * and OS/2 seems to use all four entries. + */ + + /* + * First process the data partition(s) + */ + for (i=0; i<4; i++, p++) { + unsigned long offs, size, next; + if (!NR_SECTS(p) || is_extended_partition(p)) + continue; + + /* Check the 3rd and 4th entries - + these sometimes contain random garbage */ + offs = START_SECT(p)*sector_size; + size = NR_SECTS(p)*sector_size; + next = this_sector + offs; + if (i >= 2) { + if (offs + size > this_size) + continue; + if (next < first_sector) + continue; + if (next + size > first_sector + first_size) + continue; + } + + add_gd_partition(hd, *current_minor, next, size); +#if CONFIG_BLK_DEV_MD + if (SYS_IND(p) == LINUX_RAID_PARTITION) { + md_autodetect_dev(MKDEV(hd->major,*current_minor)); + } +#endif + + (*current_minor)++; + loopct = 0; + if ((*current_minor & mask) == 0) + goto done; + } + /* + * Next, process the (first) extended partition, if present. + * (So far, there seems to be no reason to make + * extended_partition() recursive and allow a tree + * of extended partitions.) + * It should be a link to the next logical partition. + * Create a minor for this just long enough to get the next + * partition table. The minor will be reused for the next + * data partition. + */ + p -= 4; + for (i=0; i<4; i++, p++) + if (NR_SECTS(p) && is_extended_partition(p)) + break; + if (i == 4) + goto done; /* nothing left to do */ + + this_sector = first_sector + START_SECT(p) * sector_size; + this_size = NR_SECTS(p) * sector_size; + minor = *current_minor; + put_dev_sector(sect); + } +done: + put_dev_sector(sect); +} + +/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also + indicates linux swap. Be careful before believing this is Solaris. */ + +static void +solaris_x86_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor) +{ + +#ifdef CONFIG_SOLARIS_X86_PARTITION + long offset = hd->part[minor].start_sect; + Sector sect; + struct solaris_x86_vtoc *v; + struct solaris_x86_slice *s; + int mask = (1 << hd->minor_shift) - 1; + int i; + char buf[40]; + + v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, §); + if (!v) + return; + if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { + put_dev_sector(sect); + return; + } + printk(" %s: <solaris:", partition_name(hd, minor, buf)); + if (le32_to_cpu(v->v_version) != 1) { + printk(" cannot handle version %d vtoc>\n", + le32_to_cpu(v->v_version)); + put_dev_sector(sect); + return; + } + for (i=0; i<SOLARIS_X86_NUMSLICE; i++) { + if ((*current_minor & mask) == 0) + break; + s = &v->v_slice[i]; + + if (s->s_size == 0) + continue; + printk(" [s%d]", i); + /* solaris partitions are relative to current MS-DOS + * one but add_gd_partition starts relative to sector + * zero of the disk. Therefore, must add the offset + * of the current partition */ + add_gd_partition(hd, *current_minor, + le32_to_cpu(s->s_start)+offset, + le32_to_cpu(s->s_size)); + (*current_minor)++; + } + put_dev_sector(sect); + printk(" >\n"); +#endif +} + +#ifdef CONFIG_BSD_DISKLABEL +static void +check_and_add_bsd_partition(struct gendisk *hd, struct bsd_partition *bsd_p, + int minor, int *current_minor) +{ + struct hd_struct *lin_p; + /* check relative position of partitions. */ + for (lin_p = hd->part + 1 + minor; + lin_p - hd->part - minor < *current_minor; lin_p++) { + /* no relationship -> try again */ + if (lin_p->start_sect + lin_p->nr_sects <= le32_to_cpu(bsd_p->p_offset) || + lin_p->start_sect >= le32_to_cpu(bsd_p->p_offset) + le32_to_cpu(bsd_p->p_size)) + continue; + /* equal -> no need to add */ + if (lin_p->start_sect == le32_to_cpu(bsd_p->p_offset) && + lin_p->nr_sects == le32_to_cpu(bsd_p->p_size)) + return; + /* bsd living within dos partition */ + if (lin_p->start_sect <= le32_to_cpu(bsd_p->p_offset) && lin_p->start_sect + + lin_p->nr_sects >= le32_to_cpu(bsd_p->p_offset) + le32_to_cpu(bsd_p->p_size)) { +#ifdef DEBUG_BSD_DISKLABEL + printk("w: %d %ld+%ld,%d+%d", + lin_p - hd->part, + lin_p->start_sect, lin_p->nr_sects, + le32_to_cpu(bsd_p->p_offset), + le32_to_cpu(bsd_p->p_size)); +#endif + break; + } + /* ouch: bsd and linux overlap. Don't even try for that partition */ +#ifdef DEBUG_BSD_DISKLABEL + printk("???: %d %ld+%ld,%d+%d", + lin_p - hd->part, lin_p->start_sect, lin_p->nr_sects, + le32_to_cpu(bsd_p->p_offset), le32_to_cpu(bsd_p->p_size)); +#endif + printk("???"); + return; + } /* if the bsd partition is not currently known to linux, we end + * up here + */ + add_gd_partition(hd, *current_minor, le32_to_cpu(bsd_p->p_offset), + le32_to_cpu(bsd_p->p_size)); + (*current_minor)++; +} + +/* + * Create devices for BSD partitions listed in a disklabel, under a + * dos-like partition. See extended_partition() for more information. + */ +static void do_bsd_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor, char *name, int max_partitions) +{ + long offset = hd->part[minor].start_sect; + Sector sect; + struct bsd_disklabel *l; + struct bsd_partition *p; + int mask = (1 << hd->minor_shift) - 1; + char buf[40]; + + l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, §); + if (!l) + return; + if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { + put_dev_sector(sect); + return; + } + printk(" %s: <%s", partition_name(hd, minor, buf), name); + + if (le16_to_cpu(l->d_npartitions) < max_partitions) + max_partitions = le16_to_cpu(l->d_npartitions); + for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) { + if ((*current_minor & mask) == 0) + break; + if (p->p_fstype == BSD_FS_UNUSED) + continue; + check_and_add_bsd_partition(hd, p, minor, current_minor); + } + put_dev_sector(sect); + printk(" >\n"); +} +#endif + +static void bsd_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor) +{ +#ifdef CONFIG_BSD_DISKLABEL + do_bsd_partition(hd, bdev, minor, current_minor, "bsd", + BSD_MAXPARTITIONS); +#endif +} + +static void netbsd_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor) +{ +#ifdef CONFIG_BSD_DISKLABEL + do_bsd_partition(hd, bdev, minor, current_minor, "netbsd", + BSD_MAXPARTITIONS); +#endif +} + +static void openbsd_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor) +{ +#ifdef CONFIG_BSD_DISKLABEL + do_bsd_partition(hd, bdev, minor, current_minor, + "openbsd", OPENBSD_MAXPARTITIONS); +#endif +} + +/* + * Create devices for Unixware partitions listed in a disklabel, under a + * dos-like partition. See extended_partition() for more information. + */ +static void unixware_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor) +{ +#ifdef CONFIG_UNIXWARE_DISKLABEL + long offset = hd->part[minor].start_sect; + Sector sect; + struct unixware_disklabel *l; + struct unixware_slice *p; + int mask = (1 << hd->minor_shift) - 1; + char buf[40]; + + l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, §); + if (!l) + return; + if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || + le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) { + put_dev_sector(sect); + return; + } + printk(" %s: <unixware:", partition_name(hd, minor, buf)); + p = &l->vtoc.v_slice[1]; + /* I omit the 0th slice as it is the same as whole disk. */ + while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { + if ((*current_minor & mask) == 0) + break; + + if (p->s_label != UNIXWARE_FS_UNUSED) { + add_gd_partition(hd, *current_minor, START_SECT(p), + NR_SECTS(p)); + (*current_minor)++; + } + p++; + } + put_dev_sector(sect); + printk(" >\n"); +#endif +} + +/* + * Minix 2.0.0/2.0.2 subpartition support. + * Anand Krishnamurthy <anandk@wiproge.med.ge.com> + * Rajeev V. Pillai <rajeevvp@yahoo.com> + */ +static void minix_partition(struct gendisk *hd, struct block_device *bdev, + int minor, int *current_minor) +{ +#ifdef CONFIG_MINIX_SUBPARTITION + long offset = hd->part[minor].start_sect; + Sector sect; + unsigned char *data; + struct partition *p; + int mask = (1 << hd->minor_shift) - 1; + int i; + char buf[40]; + + data = read_dev_sector(bdev, offset, §); + if (!data) + return; + + p = (struct partition *)(data + 0x1be); + + /* The first sector of a Minix partition can have either + * a secondary MBR describing its subpartitions, or + * the normal boot sector. */ + if (msdos_magic_present (data + 510) && + SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ + + printk(" %s: <minix:", partition_name(hd, minor, buf)); + for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { + if ((*current_minor & mask) == 0) + break; + /* add each partition in use */ + if (SYS_IND(p) == MINIX_PARTITION) { + add_gd_partition(hd, *current_minor, + START_SECT(p), NR_SECTS(p)); + (*current_minor)++; + } + } + printk(" >\n"); + } + put_dev_sector(sect); +#endif /* CONFIG_MINIX_SUBPARTITION */ +} + +static struct { + unsigned char id; + void (*parse)(struct gendisk *, struct block_device *, int, int *); +} subtypes[] = { + {BSD_PARTITION, bsd_partition}, + {NETBSD_PARTITION, netbsd_partition}, + {OPENBSD_PARTITION, openbsd_partition}, + {MINIX_PARTITION, minix_partition}, + {UNIXWARE_PARTITION, unixware_partition}, + {SOLARIS_X86_PARTITION, solaris_x86_partition}, + {0, NULL}, +}; +/* + * Look for various forms of IDE disk geometry translation + */ +static int handle_ide_mess(struct block_device *bdev) +{ +#ifdef CONFIG_BLK_DEV_IDE + Sector sect; + unsigned char *data; + kdev_t dev = to_kdev_t(bdev->bd_dev); + unsigned int sig; + int heads = 0; + struct partition *p; + int i; + + if (MSDOS_DEBUG) + printk (KERN_ALERT "handle_ide_mess ------------\n"); + + /* + * The i386 partition handling programs very often + * make partitions end on cylinder boundaries. + * There is no need to do so, and Linux fdisk doesnt always + * do this, and Windows NT on Alpha doesnt do this either, + * but still, this helps to guess #heads. + */ + data = read_dev_sector(bdev, 0, §); + if (!data) + return -1; + if (!msdos_magic_present(data + 510)) { + put_dev_sector(sect); + return 0; + } + sig = le16_to_cpu(*(unsigned short *)(data + 2)); + p = (struct partition *) (data + 0x1be); + for (i = 0; i < 4; i++) { + struct partition *q = &p[i]; + if (NR_SECTS(q)) { + if ((q->sector & 63) == 1 && + (q->end_sector & 63) == 63) + heads = q->end_head + 1; + break; + } + } + if (SYS_IND(p) == EZD_PARTITION) { + /* + * Accesses to sector 0 must go to sector 1 instead. + */ + if (ide_xlate_1024(dev, -1, heads, " [EZD]")) + goto reread; + } else if (SYS_IND(p) == DM6_PARTITION) { + + /* + * Everything on the disk is offset by 63 sectors, + * including a "new" MBR with its own partition table. + */ + if (ide_xlate_1024(dev, 1, heads, " [DM6:DDO]")) + goto reread; + } else if (sig <= 0x1ae && + data[sig] == 0xAA && data[sig+1] == 0x55 && + (data[sig+2] & 1)) { + /* DM6 signature in MBR, courtesy of OnTrack */ + (void) ide_xlate_1024 (dev, 0, heads, " [DM6:MBR]"); + } else if (SYS_IND(p) == DM6_AUX1PARTITION || + SYS_IND(p) == DM6_AUX3PARTITION) { + /* + * DM6 on other than the first (boot) drive + */ + (void) ide_xlate_1024(dev, 0, heads, " [DM6:AUX]"); + } else { + (void) ide_xlate_1024(dev, 2, heads, " [PTBL]"); + } + put_dev_sector(sect); + + if (MSDOS_DEBUG) + printk (KERN_ALERT "handle_ide_mess -------- %d\n", heads); + return 1; + +reread: + put_dev_sector(sect); + /* Flush the cache */ + invalidate_bdev(bdev, 1); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); +#endif /* CONFIG_BLK_DEV_IDE */ + return 1; +} + +int msdos_partition(struct gendisk *hd, struct block_device *bdev, + unsigned long first_sector, int first_part_minor) +{ + int i, minor = first_part_minor; + Sector sect; + struct partition *p; + unsigned char *data; + int mask = (1 << hd->minor_shift) - 1; + int sector_size = get_hardsect_size(to_kdev_t(bdev->bd_dev)) / 512; + int current_minor = first_part_minor; + int err; + + if (MSDOS_DEBUG) printk (KERN_ALERT "msdos.c::msdos_partition\n"); + err = handle_ide_mess(bdev); + if (err <= 0) + return err; + data = read_dev_sector(bdev, 0, §); + if (!data) + return -1; + if (!msdos_magic_present(data + 510)) { + put_dev_sector(sect); + return 0; + } + p = (struct partition *) (data + 0x1be); + + /* + * Look for partitions in two passes: + * First find the primary and DOS-type extended partitions. + * On the second pass look inside *BSD, Unixware and Solaris partitions. + */ + + current_minor += 4; + for (i=1 ; i<=4 ; minor++,i++,p++) { + if (!NR_SECTS(p)) + continue; + add_gd_partition(hd, minor, + first_sector+START_SECT(p)*sector_size, + NR_SECTS(p)*sector_size); +#if CONFIG_BLK_DEV_MD + if (SYS_IND(p) == LINUX_RAID_PARTITION) { + md_autodetect_dev(MKDEV(hd->major,minor)); + } +#endif + if (is_extended_partition(p)) { + unsigned long size = hd->part[minor].nr_sects; + printk(" <"); + /* prevent someone doing mkfs or mkswap on an + extended partition, but leave room for LILO */ + if (size > 2) + hd->part[minor].nr_sects = 2; + extended_partition(hd, bdev, minor, size, ¤t_minor); + printk(" >"); + } + } + + /* + * Check for old-style Disk Manager partition table + */ + if (msdos_magic_present(data + 0xfc)) { + p = (struct partition *) (0x1be + data); + for (i = 4 ; i < 16 ; i++, current_minor++) { + p--; + if ((current_minor & mask) == 0) + break; + if (!(START_SECT(p) && NR_SECTS(p))) + continue; + add_gd_partition(hd, current_minor, START_SECT(p), NR_SECTS(p)); + } + } + printk("\n"); + + /* second pass - output for each on a separate line */ + minor -= 4; + p = (struct partition *) (0x1be + data); + for (i=1 ; i<=4 ; minor++,i++,p++) { + unsigned char id = SYS_IND(p); + int n; + + if (!NR_SECTS(p)) + continue; + + for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++) + ; + + if (subtypes[n].parse) + subtypes[n].parse(hd, bdev, minor, ¤t_minor); + } + put_dev_sector(sect); + return 1; +} diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h index 8269718ec8..df25598730 100644 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h @@ -258,6 +258,16 @@ static inline int HYPERVISOR_network_op(void *network_op) return ret; } +static inline int HYPERVISOR_block_io_op(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_block_io_op) ); + + return ret; +} + static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) { int ret; diff --git a/xenolinux-2.4.16-sparse/include/linux/blk.h b/xenolinux-2.4.16-sparse/include/linux/blk.h new file mode 100644 index 0000000000..12eb99ff16 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/linux/blk.h @@ -0,0 +1,416 @@ +#ifndef _BLK_H +#define _BLK_H + +#include <linux/blkdev.h> +#include <linux/locks.h> +#include <linux/config.h> +#include <linux/spinlock.h> + +/* + * Spinlock for protecting the request queue which + * is mucked around with in interrupts on potentially + * multiple CPU's.. + */ +extern spinlock_t io_request_lock; + +/* + * Initialization functions. + */ +extern int isp16_init(void); +extern int cdu31a_init(void); +extern int acsi_init(void); +extern int mcd_init(void); +extern int mcdx_init(void); +extern int sbpcd_init(void); +extern int aztcd_init(void); +extern int sony535_init(void); +extern int gscd_init(void); +extern int cm206_init(void); +extern int optcd_init(void); +extern int sjcd_init(void); +extern int cdi_init(void); +extern int hd_init(void); +extern int ide_init(void); +extern int xd_init(void); +extern int mfm_init(void); +extern int loop_init(void); +extern int md_init(void); +extern int ap_init(void); +extern int ddv_init(void); +extern int z2_init(void); +extern int swim3_init(void); +extern int swimiop_init(void); +extern int amiga_floppy_init(void); +extern int atari_floppy_init(void); +extern int ez_init(void); +extern int bpcd_init(void); +extern int ps2esdi_init(void); +extern int jsfd_init(void); +extern int viodasd_init(void); +extern int viocd_init(void); +extern int xlblk_init(void); + +#if defined(CONFIG_ARCH_S390) +extern int dasd_init(void); +extern int xpram_init(void); +extern int tapeblock_init(void); +#endif /* CONFIG_ARCH_S390 */ + +extern void set_device_ro(kdev_t dev,int flag); +void add_blkdev_randomness(int major); + +extern int floppy_init(void); +extern void rd_load(void); +extern int rd_init(void); +extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ +extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ +extern int rd_image_start; /* starting block # of image */ + +#ifdef CONFIG_BLK_DEV_INITRD + +#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ + +extern unsigned long initrd_start,initrd_end; +extern int mount_initrd; /* zero if initrd should not be mounted */ +extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */ +void initrd_init(void); + +#endif + + +/* + * end_request() and friends. Must be called with the request queue spinlock + * acquired. All functions called within end_request() _must_be_ atomic. + * + * Several drivers define their own end_request and call + * end_that_request_first() and end_that_request_last() + * for parts of the original function. This prevents + * code duplication in drivers. + */ + +static inline void blkdev_dequeue_request(struct request * req) +{ + list_del(&req->queue); +} + +int end_that_request_first(struct request *req, int uptodate, char *name); +void end_that_request_last(struct request *req); + +#if defined(MAJOR_NR) || defined(IDE_DRIVER) + +#undef DEVICE_ON +#undef DEVICE_OFF + +/* + * Add entries as needed. + */ + +#ifdef IDE_DRIVER + +#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) +#define DEVICE_NAME "ide" + +#elif (MAJOR_NR == RAMDISK_MAJOR) + +/* ram disk */ +#define DEVICE_NAME "ramdisk" +#define DEVICE_NR(device) (MINOR(device)) +#define DEVICE_NO_RANDOM + +#elif (MAJOR_NR == Z2RAM_MAJOR) + +/* Zorro II Ram */ +#define DEVICE_NAME "Z2RAM" +#define DEVICE_REQUEST do_z2_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == FLOPPY_MAJOR) + +static void floppy_off(unsigned int nr); + +#define DEVICE_NAME "floppy" +#define DEVICE_INTR do_floppy +#define DEVICE_REQUEST do_fd_request +#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 )) +#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device)) + +#elif (MAJOR_NR == HD_MAJOR) + +/* Hard disk: timeout is 6 seconds. */ +#define DEVICE_NAME "hard disk" +#define DEVICE_INTR do_hd +#define TIMEOUT_VALUE (6*HZ) +#define DEVICE_REQUEST do_hd_request +#define DEVICE_NR(device) (MINOR(device)>>6) + +#elif (SCSI_DISK_MAJOR(MAJOR_NR)) + +#define DEVICE_NAME "scsidisk" +#define TIMEOUT_VALUE (2*HZ) +#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4)) + +/* Kludge to use the same number for both char and block major numbers */ +#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER) + +#define DEVICE_NAME "Multiple devices driver" +#define DEVICE_REQUEST do_md_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == SCSI_TAPE_MAJOR) + +#define DEVICE_NAME "scsitape" +#define DEVICE_INTR do_st +#define DEVICE_NR(device) (MINOR(device) & 0x7f) + +#elif (MAJOR_NR == OSST_MAJOR) + +#define DEVICE_NAME "onstream" +#define DEVICE_INTR do_osst +#define DEVICE_NR(device) (MINOR(device) & 0x7f) +#define DEVICE_ON(device) +#define DEVICE_OFF(device) + +#elif (MAJOR_NR == SCSI_CDROM_MAJOR) + +#define DEVICE_NAME "CD-ROM" +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == XT_DISK_MAJOR) + +#define DEVICE_NAME "xt disk" +#define DEVICE_REQUEST do_xd_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == PS2ESDI_MAJOR) + +#define DEVICE_NAME "PS/2 ESDI" +#define DEVICE_REQUEST do_ps2esdi_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == CDU31A_CDROM_MAJOR) + +#define DEVICE_NAME "CDU31A" +#define DEVICE_REQUEST do_cdu31a_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE)) + +#define DEVICE_NAME "ACSI" +#define DEVICE_INTR do_acsi +#define DEVICE_REQUEST do_acsi_request +#define DEVICE_NR(device) (MINOR(device) >> 4) + +#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR) + +#define DEVICE_NAME "Mitsumi CD-ROM" +/* #define DEVICE_INTR do_mcd */ +#define DEVICE_REQUEST do_mcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR) + +#define DEVICE_NAME "Mitsumi CD-ROM" +/* #define DEVICE_INTR do_mcdx */ +#define DEVICE_REQUEST do_mcdx_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #1" +#define DEVICE_REQUEST do_sbpcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #2" +#define DEVICE_REQUEST do_sbpcd2_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #3" +#define DEVICE_REQUEST do_sbpcd3_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #4" +#define DEVICE_REQUEST do_sbpcd4_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == AZTECH_CDROM_MAJOR) + +#define DEVICE_NAME "Aztech CD-ROM" +#define DEVICE_REQUEST do_aztcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == CDU535_CDROM_MAJOR) + +#define DEVICE_NAME "SONY-CDU535" +#define DEVICE_INTR do_cdu535 +#define DEVICE_REQUEST do_cdu535_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR) + +#define DEVICE_NAME "Goldstar R420" +#define DEVICE_REQUEST do_gscd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == CM206_CDROM_MAJOR) +#define DEVICE_NAME "Philips/LMS CD-ROM cm206" +#define DEVICE_REQUEST do_cm206_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == OPTICS_CDROM_MAJOR) + +#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM" +#define DEVICE_REQUEST do_optcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == SANYO_CDROM_MAJOR) + +#define DEVICE_NAME "Sanyo H94A CD-ROM" +#define DEVICE_REQUEST do_sjcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == APBLOCK_MAJOR) + +#define DEVICE_NAME "apblock" +#define DEVICE_REQUEST ap_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == DDV_MAJOR) + +#define DEVICE_NAME "ddv" +#define DEVICE_REQUEST ddv_request +#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS) + +#elif (MAJOR_NR == MFM_ACORN_MAJOR) + +#define DEVICE_NAME "mfm disk" +#define DEVICE_INTR do_mfm +#define DEVICE_REQUEST do_mfm_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == NBD_MAJOR) + +#define DEVICE_NAME "nbd" +#define DEVICE_REQUEST do_nbd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MDISK_MAJOR) + +#define DEVICE_NAME "mdisk" +#define DEVICE_REQUEST mdisk_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == DASD_MAJOR) + +#define DEVICE_NAME "dasd" +#define DEVICE_REQUEST do_dasd_request +#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) + +#elif (MAJOR_NR == I2O_MAJOR) + +#define DEVICE_NAME "I2O block" +#define DEVICE_REQUEST i2ob_request +#define DEVICE_NR(device) (MINOR(device)>>4) + +#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR) + +#define DEVICE_NAME "ida" +#define TIMEOUT_VALUE (25*HZ) +#define DEVICE_REQUEST do_ida_request +#define DEVICE_NR(device) (MINOR(device) >> 4) + +#elif (MAJOR_NR == XLBLK_MAJOR) + +#define DEVICE_NAME "blk" +#define DEVICE_REQUEST do_xlblk_request +/* #define DEVICE_INTR */ +#define DEVICE_NR(device) (MINOR(device)) + +#endif /* MAJOR_NR == whatever */ + +/* provide DEVICE_xxx defaults, if not explicitly defined + * above in the MAJOR_NR==xxx if-elif tree */ +#ifndef DEVICE_ON +#define DEVICE_ON(device) do {} while (0) +#endif +#ifndef DEVICE_OFF +#define DEVICE_OFF(device) do {} while (0) +#endif + +#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR) +#if !defined(IDE_DRIVER) + +#ifndef CURRENT +#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#endif +#ifndef QUEUE_EMPTY +#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#endif + +#ifndef DEVICE_NAME +#define DEVICE_NAME "unknown" +#endif + +#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev) + +#ifdef DEVICE_INTR +static void (*DEVICE_INTR)(void) = NULL; +#endif + +#define SET_INTR(x) (DEVICE_INTR = (x)) + +#ifdef DEVICE_REQUEST +static void (DEVICE_REQUEST)(request_queue_t *); +#endif + +#ifdef DEVICE_INTR +#define CLEAR_INTR SET_INTR(NULL) +#else +#define CLEAR_INTR +#endif + +#define INIT_REQUEST \ + if (QUEUE_EMPTY) {\ + CLEAR_INTR; \ + return; \ + } \ + if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ + panic(DEVICE_NAME ": request list destroyed"); \ + if (CURRENT->bh) { \ + if (!buffer_locked(CURRENT->bh)) \ + panic(DEVICE_NAME ": block not locked"); \ + } + +#endif /* !defined(IDE_DRIVER) */ + + +#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */ + +#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) + +static inline void end_request(int uptodate) { + struct request *req = CURRENT; + + if (end_that_request_first(req, uptodate, DEVICE_NAME)) + return; + +#ifndef DEVICE_NO_RANDOM + add_blkdev_randomness(MAJOR(req->rq_dev)); +#endif + DEVICE_OFF(req->rq_dev); + blkdev_dequeue_request(req); + end_that_request_last(req); +} + +#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ +#endif /* LOCAL_END_REQUEST */ + +#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */ +#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */ + +#endif /* _BLK_H */ diff --git a/xenolinux-2.4.16-sparse/include/linux/major.h b/xenolinux-2.4.16-sparse/include/linux/major.h new file mode 100644 index 0000000000..820bf68dc7 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/linux/major.h @@ -0,0 +1,199 @@ +#ifndef _LINUX_MAJOR_H +#define _LINUX_MAJOR_H + +/* + * This file has definitions for major device numbers. + * For the device number assignments, see Documentation/devices.txt. + */ + +/* limits */ + +/* + * Important: Don't change this to 256. Major number 255 is and must be + * reserved for future expansion into a larger dev_t space. + */ +#define MAX_CHRDEV 255 +#define MAX_BLKDEV 255 + +#define UNNAMED_MAJOR 0 +#define MEM_MAJOR 1 +#define RAMDISK_MAJOR 1 +#define FLOPPY_MAJOR 2 +#define PTY_MASTER_MAJOR 2 +#define IDE0_MAJOR 3 +#define PTY_SLAVE_MAJOR 3 +#define HD_MAJOR IDE0_MAJOR +#define TTY_MAJOR 4 +#define TTYAUX_MAJOR 5 +#define LP_MAJOR 6 +#define VCS_MAJOR 7 +#define LOOP_MAJOR 7 +#define SCSI_DISK0_MAJOR 8 +#define SCSI_TAPE_MAJOR 9 +#define MD_MAJOR 9 +#define MISC_MAJOR 10 +#define SCSI_CDROM_MAJOR 11 +#define QIC02_TAPE_MAJOR 12 +#define XT_DISK_MAJOR 13 +#define SOUND_MAJOR 14 +#define CDU31A_CDROM_MAJOR 15 +#define JOYSTICK_MAJOR 15 +#define GOLDSTAR_CDROM_MAJOR 16 +#define OPTICS_CDROM_MAJOR 17 +#define SANYO_CDROM_MAJOR 18 +#define CYCLADES_MAJOR 19 +#define CYCLADESAUX_MAJOR 20 +#define MITSUMI_X_CDROM_MAJOR 20 +#define MFM_ACORN_MAJOR 21 /* ARM Linux /dev/mfm */ +#define SCSI_GENERIC_MAJOR 21 +#define Z8530_MAJOR 34 +#define DIGI_MAJOR 23 +#define IDE1_MAJOR 22 +#define DIGICU_MAJOR 22 +#define MITSUMI_CDROM_MAJOR 23 +#define CDU535_CDROM_MAJOR 24 +#define STL_SERIALMAJOR 24 +#define MATSUSHITA_CDROM_MAJOR 25 +#define STL_CALLOUTMAJOR 25 +#define MATSUSHITA_CDROM2_MAJOR 26 +#define QIC117_TAPE_MAJOR 27 +#define MATSUSHITA_CDROM3_MAJOR 27 +#define MATSUSHITA_CDROM4_MAJOR 28 +#define STL_SIOMEMMAJOR 28 +#define ACSI_MAJOR 28 +#define AZTECH_CDROM_MAJOR 29 +#define GRAPHDEV_MAJOR 29 /* SparcLinux & Linux/68k /dev/fb */ +#define SHMIQ_MAJOR 85 /* Linux/mips, SGI /dev/shmiq */ +#define CM206_CDROM_MAJOR 32 +#define IDE2_MAJOR 33 +#define IDE3_MAJOR 34 +#define XPRAM_MAJOR 35 /* expanded storage on S/390 = "slow ram" */ + /* proposed by Peter */ +#define NETLINK_MAJOR 36 +#define PS2ESDI_MAJOR 36 +#define IDETAPE_MAJOR 37 +#define Z2RAM_MAJOR 37 +#define APBLOCK_MAJOR 38 /* AP1000 Block device */ +#define DDV_MAJOR 39 /* AP1000 DDV block device */ +#define NBD_MAJOR 43 /* Network block device */ +#define RISCOM8_NORMAL_MAJOR 48 +#define DAC960_MAJOR 48 /* 48..55 */ +#define RISCOM8_CALLOUT_MAJOR 49 +#define MKISS_MAJOR 55 +#define DSP56K_MAJOR 55 /* DSP56001 processor device */ + +#define IDE4_MAJOR 56 +#define IDE5_MAJOR 57 + +#define LVM_BLK_MAJOR 58 /* Logical Volume Manager */ + +#define SCSI_DISK1_MAJOR 65 +#define SCSI_DISK2_MAJOR 66 +#define SCSI_DISK3_MAJOR 67 +#define SCSI_DISK4_MAJOR 68 +#define SCSI_DISK5_MAJOR 69 +#define SCSI_DISK6_MAJOR 70 +#define SCSI_DISK7_MAJOR 71 + + +#define COMPAQ_SMART2_MAJOR 72 +#define COMPAQ_SMART2_MAJOR1 73 +#define COMPAQ_SMART2_MAJOR2 74 +#define COMPAQ_SMART2_MAJOR3 75 +#define COMPAQ_SMART2_MAJOR4 76 +#define COMPAQ_SMART2_MAJOR5 77 +#define COMPAQ_SMART2_MAJOR6 78 +#define COMPAQ_SMART2_MAJOR7 79 + +#define SPECIALIX_NORMAL_MAJOR 75 +#define SPECIALIX_CALLOUT_MAJOR 76 + +#define COMPAQ_CISS_MAJOR 104 +#define COMPAQ_CISS_MAJOR1 105 +#define COMPAQ_CISS_MAJOR2 106 +#define COMPAQ_CISS_MAJOR3 107 +#define COMPAQ_CISS_MAJOR4 108 +#define COMPAQ_CISS_MAJOR5 109 +#define COMPAQ_CISS_MAJOR6 110 +#define COMPAQ_CISS_MAJOR7 111 + +#define ATARAID_MAJOR 114 + +#define DASD_MAJOR 94 /* Official assignations from Peter */ + +#define MDISK_MAJOR 95 /* Official assignations from Peter */ + +#define I2O_MAJOR 80 /* 80->87 */ + +#define IDE6_MAJOR 88 +#define IDE7_MAJOR 89 +#define IDE8_MAJOR 90 +#define IDE9_MAJOR 91 + +#define UBD_MAJOR 98 + +#define AURORA_MAJOR 79 + +#define JSFD_MAJOR 99 + +#define PHONE_MAJOR 100 + +#define LVM_CHAR_MAJOR 109 /* Logical Volume Manager */ + +#define UMEM_MAJOR 116 /* http://www.umem.com/ Battery Backed RAM */ + +#define XLBLK_MAJOR 123 /* XenoLinux Block Device */ + +#define RTF_MAJOR 150 +#define RAW_MAJOR 162 + +#define USB_ACM_MAJOR 166 +#define USB_ACM_AUX_MAJOR 167 +#define USB_CHAR_MAJOR 180 + +#define UNIX98_PTY_MASTER_MAJOR 128 +#define UNIX98_PTY_MAJOR_COUNT 8 +#define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) + +#define VXVM_MAJOR 199 /* VERITAS volume i/o driver */ +#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */ +#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */ + +#define MSR_MAJOR 202 +#define CPUID_MAJOR 203 + +#define OSST_MAJOR 206 /* OnStream-SCx0 SCSI tape */ + +#define IBM_TTY3270_MAJOR 227 /* Official allocations now */ +#define IBM_FS3270_MAJOR 228 + +/* + * Tests for SCSI devices. + */ + +#define SCSI_DISK_MAJOR(M) ((M) == SCSI_DISK0_MAJOR || \ + ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR)) + +#define SCSI_BLK_MAJOR(M) \ + (SCSI_DISK_MAJOR(M) \ + || (M) == SCSI_CDROM_MAJOR) + +static __inline__ int scsi_blk_major(int m) { + return SCSI_BLK_MAJOR(m); +} + +/* + * Tests for IDE devices + */ +#define IDE_DISK_MAJOR(M) ((M) == IDE0_MAJOR || (M) == IDE1_MAJOR || \ + (M) == IDE2_MAJOR || (M) == IDE3_MAJOR || \ + (M) == IDE4_MAJOR || (M) == IDE5_MAJOR || \ + (M) == IDE6_MAJOR || (M) == IDE7_MAJOR || \ + (M) == IDE8_MAJOR || (M) == IDE9_MAJOR) + +static __inline__ int ide_blk_major(int m) +{ + return IDE_DISK_MAJOR(m); +} + +#endif diff --git a/xenolinux-2.4.16-sparse/init/main.c b/xenolinux-2.4.16-sparse/init/main.c new file mode 100644 index 0000000000..a48a7773b8 --- /dev/null +++ b/xenolinux-2.4.16-sparse/init/main.c @@ -0,0 +1,871 @@ +/* + * linux/init/main.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * GK 2/5/95 - Changed to support mounting root fs via NFS + * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 + * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 + * Simplified starting of init: Michael A. Griffith <grif@acm.org> + */ + +#define __KERNEL_SYSCALLS__ + +#include <linux/config.h> +#include <linux/proc_fs.h> +#include <linux/devfs_fs_kernel.h> +#include <linux/unistd.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/delay.h> +#include <linux/utsname.h> +#include <linux/ioport.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#include <linux/blk.h> +#include <linux/hdreg.h> +#include <linux/iobuf.h> +#include <linux/bootmem.h> +#include <linux/tty.h> + +#include <asm/io.h> +#include <asm/bugs.h> + +#if defined(CONFIG_ARCH_S390) +#include <asm/s390mach.h> +#include <asm/ccwcache.h> +#endif + +#ifdef CONFIG_PCI +#include <linux/pci.h> +#endif + +#ifdef CONFIG_DIO +#include <linux/dio.h> +#endif + +#ifdef CONFIG_ZORRO +#include <linux/zorro.h> +#endif + +#ifdef CONFIG_MTRR +# include <asm/mtrr.h> +#endif + +#ifdef CONFIG_NUBUS +#include <linux/nubus.h> +#endif + +#ifdef CONFIG_ISAPNP +#include <linux/isapnp.h> +#endif + +#ifdef CONFIG_IRDA +extern int irda_proto_init(void); +extern int irda_device_init(void); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#include <asm/smp.h> +#endif + +#if defined(CONFIG_KDB) +#include <linux/kdb.h> +#endif + +/* + * Versions of gcc older than that listed below may actually compile + * and link okay, but the end product can have subtle run time bugs. + * To avoid associated bogus bug reports, we flatly refuse to compile + * with a gcc that is known to be too old from the very beginning. + */ +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91) +#error Sorry, your GCC is too old. It builds incorrect kernels. +#endif + +extern char _stext, _etext; +extern char *linux_banner; + +static int init(void *); + +extern void init_IRQ(void); +extern void init_modules(void); +extern void sock_init(void); +extern void fork_init(unsigned long); +extern void mca_init(void); +extern void sbus_init(void); +extern void ppc_init(void); +extern void sysctl_init(void); +extern void signals_init(void); +extern int init_pcmcia_ds(void); + +extern void free_initmem(void); + +#ifdef CONFIG_TC +extern void tc_init(void); +#endif + +extern void ecard_init(void); + +#if defined(CONFIG_SYSVIPC) +extern void ipc_init(void); +#endif + +/* + * Boot command-line arguments + */ +#define MAX_INIT_ARGS 8 +#define MAX_INIT_ENVS 8 + +extern void time_init(void); +extern void softirq_init(void); + +int rows, cols; + +#ifdef CONFIG_BLK_DEV_INITRD +unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ +#endif + +int root_mountflags = MS_RDONLY; +char *execute_command; +char root_device_name[64]; + + +static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +static char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; + +static int __init profile_setup(char *str) +{ + int par; + if (get_option(&str,&par)) prof_shift = par; + return 1; +} + +__setup("profile=", profile_setup); + + +static struct dev_name_struct { + const char *name; + const int num; +} root_dev_names[] __initdata = { + { "nfs", 0x00ff }, + { "blk", 0x7b00 }, + { "hda", 0x0300 }, + { "hdb", 0x0340 }, + { "loop", 0x0700 }, + { "hdc", 0x1600 }, + { "hdd", 0x1640 }, + { "hde", 0x2100 }, + { "hdf", 0x2140 }, + { "hdg", 0x2200 }, + { "hdh", 0x2240 }, + { "hdi", 0x3800 }, + { "hdj", 0x3840 }, + { "hdk", 0x3900 }, + { "hdl", 0x3940 }, + { "hdm", 0x5800 }, + { "hdn", 0x5840 }, + { "hdo", 0x5900 }, + { "hdp", 0x5940 }, + { "hdq", 0x5A00 }, + { "hdr", 0x5A40 }, + { "hds", 0x5B00 }, + { "hdt", 0x5B40 }, + { "sda", 0x0800 }, + { "sdb", 0x0810 }, + { "sdc", 0x0820 }, + { "sdd", 0x0830 }, + { "sde", 0x0840 }, + { "sdf", 0x0850 }, + { "sdg", 0x0860 }, + { "sdh", 0x0870 }, + { "sdi", 0x0880 }, + { "sdj", 0x0890 }, + { "sdk", 0x08a0 }, + { "sdl", 0x08b0 }, + { "sdm", 0x08c0 }, + { "sdn", 0x08d0 }, + { "sdo", 0x08e0 }, + { "sdp", 0x08f0 }, + { "ada", 0x1c00 }, + { "adb", 0x1c10 }, + { "adc", 0x1c20 }, + { "add", 0x1c30 }, + { "ade", 0x1c40 }, + { "fd", 0x0200 }, + { "md", 0x0900 }, + { "xda", 0x0d00 }, + { "xdb", 0x0d40 }, + { "ram", 0x0100 }, + { "scd", 0x0b00 }, + { "mcd", 0x1700 }, + { "cdu535", 0x1800 }, + { "sonycd", 0x1800 }, + { "aztcd", 0x1d00 }, + { "cm206cd", 0x2000 }, + { "gscd", 0x1000 }, + { "sbpcd", 0x1900 }, + { "eda", 0x2400 }, + { "edb", 0x2440 }, + { "pda", 0x2d00 }, + { "pdb", 0x2d10 }, + { "pdc", 0x2d20 }, + { "pdd", 0x2d30 }, + { "pcd", 0x2e00 }, + { "pf", 0x2f00 }, + { "apblock", APBLOCK_MAJOR << 8}, + { "ddv", DDV_MAJOR << 8}, + { "jsfd", JSFD_MAJOR << 8}, +#if defined(CONFIG_ARCH_S390) + { "dasda", (DASD_MAJOR << MINORBITS) }, + { "dasdb", (DASD_MAJOR << MINORBITS) + (1 << 2) }, + { "dasdc", (DASD_MAJOR << MINORBITS) + (2 << 2) }, + { "dasdd", (DASD_MAJOR << MINORBITS) + (3 << 2) }, + { "dasde", (DASD_MAJOR << MINORBITS) + (4 << 2) }, + { "dasdf", (DASD_MAJOR << MINORBITS) + (5 << 2) }, + { "dasdg", (DASD_MAJOR << MINORBITS) + (6 << 2) }, + { "dasdh", (DASD_MAJOR << MINORBITS) + (7 << 2) }, +#endif +#if defined(CONFIG_BLK_CPQ_DA) || defined(CONFIG_BLK_CPQ_DA_MODULE) + { "ida/c0d0p",0x4800 }, + { "ida/c0d1p",0x4810 }, + { "ida/c0d2p",0x4820 }, + { "ida/c0d3p",0x4830 }, + { "ida/c0d4p",0x4840 }, + { "ida/c0d5p",0x4850 }, + { "ida/c0d6p",0x4860 }, + { "ida/c0d7p",0x4870 }, + { "ida/c0d8p",0x4880 }, + { "ida/c0d9p",0x4890 }, + { "ida/c0d10p",0x48A0 }, + { "ida/c0d11p",0x48B0 }, + { "ida/c0d12p",0x48C0 }, + { "ida/c0d13p",0x48D0 }, + { "ida/c0d14p",0x48E0 }, + { "ida/c0d15p",0x48F0 }, +#endif +#if defined(CONFIG_BLK_CPQ_CISS_DA) || defined(CONFIG_BLK_CPQ_CISS_DA_MODULE) + { "cciss/c0d0p",0x6800 }, + { "cciss/c0d1p",0x6810 }, + { "cciss/c0d2p",0x6820 }, + { "cciss/c0d3p",0x6830 }, + { "cciss/c0d4p",0x6840 }, + { "cciss/c0d5p",0x6850 }, + { "cciss/c0d6p",0x6860 }, + { "cciss/c0d7p",0x6870 }, + { "cciss/c0d8p",0x6880 }, + { "cciss/c0d9p",0x6890 }, + { "cciss/c0d10p",0x68A0 }, + { "cciss/c0d11p",0x68B0 }, + { "cciss/c0d12p",0x68C0 }, + { "cciss/c0d13p",0x68D0 }, + { "cciss/c0d14p",0x68E0 }, + { "cciss/c0d15p",0x68F0 }, +#endif + { "nftla", 0x5d00 }, + { "nftlb", 0x5d10 }, + { "nftlc", 0x5d20 }, + { "nftld", 0x5d30 }, + { "ftla", 0x2c00 }, + { "ftlb", 0x2c08 }, + { "ftlc", 0x2c10 }, + { "ftld", 0x2c18 }, + { "mtdblock", 0x1f00 }, + { NULL, 0 } +}; + +kdev_t __init name_to_kdev_t(char *line) +{ + int base = 0; + + if (strncmp(line,"/dev/",5) == 0) { + struct dev_name_struct *dev = root_dev_names; + line += 5; + do { + int len = strlen(dev->name); + if (strncmp(line,dev->name,len) == 0) { + line += len; + base = dev->num; + break; + } + dev++; + } while (dev->name); + } + return to_kdev_t(base + simple_strtoul(line,NULL,base?10:16)); +} + +static int __init root_dev_setup(char *line) +{ + int i; + char ch; + + ROOT_DEV = name_to_kdev_t(line); + memset (root_device_name, 0, sizeof root_device_name); + if (strncmp (line, "/dev/", 5) == 0) line += 5; + for (i = 0; i < sizeof root_device_name - 1; ++i) + { + ch = line[i]; + if ( isspace (ch) || (ch == ',') || (ch == '\0') ) break; + root_device_name[i] = ch; + } + return 1; +} + +__setup("root=", root_dev_setup); + +static int __init checksetup(char *line) +{ + struct kernel_param *p; + + p = &__setup_start; + do { + int n = strlen(p->str); + if (!strncmp(line,p->str,n)) { + if (p->setup_func(line+n)) + return 1; + } + p++; + } while (p < &__setup_end); + return 0; +} + +/* this should be approx 2 Bo*oMips to start (note initial shift), and will + still work even if initially too large, it will just take slightly longer */ +unsigned long loops_per_jiffy = (1<<12); + +/* This is the number of bits of precision for the loops_per_jiffy. Each + bit takes on average 1.5/HZ seconds. This (like the original) is a little + better than 1% */ +#define LPS_PREC 8 + +void __init calibrate_delay(void) +{ + unsigned long ticks, loopbit; + int lps_precision = LPS_PREC; + + loops_per_jiffy = (1<<12); + + printk("Calibrating delay loop... "); + while (loops_per_jiffy <<= 1) { + /* wait for "start of" clock tick */ + ticks = jiffies; + while (ticks == jiffies) + /* nothing */; + /* Go .. */ + ticks = jiffies; + __delay(loops_per_jiffy); + ticks = jiffies - ticks; + if (ticks) + break; + } + +/* Do a binary approximation to get loops_per_jiffy set to equal one clock + (up to lps_precision bits) */ + loops_per_jiffy >>= 1; + loopbit = loops_per_jiffy; + while ( lps_precision-- && (loopbit >>= 1) ) { + loops_per_jiffy |= loopbit; + ticks = jiffies; + while (ticks == jiffies); + ticks = jiffies; + __delay(loops_per_jiffy); + if (jiffies != ticks) /* longer than 1 tick */ + loops_per_jiffy &= ~loopbit; + } + +/* Round the value and print it */ + printk("%lu.%02lu BogoMIPS\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +} + +static int __init readonly(char *str) +{ + if (*str) + return 0; + root_mountflags |= MS_RDONLY; + return 1; +} + +static int __init readwrite(char *str) +{ + if (*str) + return 0; + root_mountflags &= ~MS_RDONLY; + return 1; +} + +static int __init debug_kernel(char *str) +{ + if (*str) + return 0; + console_loglevel = 10; + return 1; +} + +static int __init quiet_kernel(char *str) +{ + if (*str) + return 0; + console_loglevel = 4; + return 1; +} + +__setup("ro", readonly); +__setup("rw", readwrite); +__setup("debug", debug_kernel); +__setup("quiet", quiet_kernel); + +/* + * This is a simple kernel command line parsing function: it parses + * the command line, and fills in the arguments/environment to init + * as appropriate. Any cmd-line option is taken to be an environment + * variable if it contains the character '='. + * + * This routine also checks for options meant for the kernel. + * These options are not given to init - they are for internal kernel use only. + */ +static void __init parse_options(char *line) +{ + char *next,*quote; + int args, envs; + + if (!*line) + return; + args = 0; + envs = 1; /* TERM is set to 'linux' by default */ + next = line; + while ((line = next) != NULL) { + quote = strchr(line,'"'); + next = strchr(line, ' '); + while (next != NULL && quote != NULL && quote < next) { + /* we found a left quote before the next blank + * now we have to find the matching right quote + */ + next = strchr(quote+1, '"'); + if (next != NULL) { + quote = strchr(next+1, '"'); + next = strchr(next+1, ' '); + } + } + if (next != NULL) + *next++ = 0; +#if defined(CONFIG_KDB) + /* kdb, kdb=on, kdb=off, kdb=early */ + if (strncmp(line, "kdb", 3) == 0) { + if (line[3] == '\0') { + /* Backward compatibility, kdb with no option means early activation */ + printk("Boot flag kdb with no options is obsolete, use kdb=early\n"); + kdb_on = 1; + kdb_flags |= KDB_FLAG_EARLYKDB; + continue; + } + if (line[3] == '=') { + if (strcmp(line+4, "on") == 0) { + kdb_on = 1; + continue; + } + if (strcmp(line+4, "off") == 0) { + kdb_on = 0; + continue; + } + if (strcmp(line+4, "early") == 0) { + kdb_on = 1; + kdb_flags |= KDB_FLAG_EARLYKDB; + continue; + } + printk("Boot flag %s not recognised, assumed to be environment variable\n", line); + } + } +#endif + if (!strncmp(line,"init=",5)) { + line += 5; + execute_command = line; + /* In case LILO is going to boot us with default command line, + * it prepends "auto" before the whole cmdline which makes + * the shell think it should execute a script with such name. + * So we ignore all arguments entered _before_ init=... [MJ] + */ + args = 0; + continue; + } + if (checksetup(line)) + continue; + + /* + * Then check if it's an environment variable or + * an option. + */ + if (strchr(line,'=')) { + if (envs >= MAX_INIT_ENVS) + break; + envp_init[++envs] = line; + } else { + if (args >= MAX_INIT_ARGS) + break; + if (*line) + argv_init[++args] = line; + } + } + argv_init[args+1] = NULL; + envp_init[envs+1] = NULL; +} + + +extern void setup_arch(char **); +extern void cpu_idle(void); + +unsigned long wait_init_idle; + +#ifndef CONFIG_SMP + +#ifdef CONFIG_X86_LOCAL_APIC +static void __init smp_init(void) +{ + APIC_init_uniprocessor(); +} +#else +#define smp_init() do { } while (0) +#endif + +#else + + +/* Called by boot processor to activate the rest. */ +static void __init smp_init(void) +{ + /* Get other processors into their bootup holding patterns. */ + smp_boot_cpus(); + wait_init_idle = cpu_online_map; + clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */ + + smp_threads_ready=1; + smp_commence(); + + /* Wait for the other cpus to set up their idle processes */ + printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle); + while (wait_init_idle) { + cpu_relax(); + barrier(); + } + printk("All processors have done init_idle\n"); +} + +#endif + +/* + * We need to finalize in a non-__init function or else race conditions + * between the root thread and the init thread may cause start_kernel to + * be reaped by free_initmem before the root thread has proceeded to + * cpu_idle. + */ + +static void rest_init(void) +{ + kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL); + unlock_kernel(); + current->need_resched = 1; + cpu_idle(); +} + +/* + * Activate the first processor. + */ + +asmlinkage void __init start_kernel(void) +{ + char * command_line; + unsigned long mempages; + extern char saved_command_line[]; +/* + * Interrupts are still disabled. Do necessary setups, then + * enable them + */ + lock_kernel(); + printk(linux_banner); + setup_arch(&command_line); + printk("Kernel command line: %s\n", saved_command_line); + parse_options(command_line); + trap_init(); + init_IRQ(); + sched_init(); + softirq_init(); + time_init(); + + /* + * HACK ALERT! This is early. We're enabling the console before + * we've done PCI setups etc, and console_init() must be aware of + * this. But we do want output early, in case something goes wrong. + */ + console_init(); +#ifdef CONFIG_MODULES + init_modules(); +#endif + if (prof_shift) { + unsigned int size; + /* only text is profiled */ + prof_len = (unsigned long) &_etext - (unsigned long) &_stext; + prof_len >>= prof_shift; + + size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1; + prof_buffer = (unsigned int *) alloc_bootmem(size); + } + + kmem_cache_init(); + sti(); + calibrate_delay(); +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start && !initrd_below_start_ok && + initrd_start < min_low_pfn << PAGE_SHIFT) { + printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " + "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } +#endif + mem_init(); + kmem_cache_sizes_init(); +#if defined(CONFIG_KDB) + kdb_init(); + if (KDB_FLAG(EARLYKDB)) { + KDB_ENTER(); + } +#endif + mempages = num_physpages; + + fork_init(mempages); + proc_caches_init(); + vfs_caches_init(mempages); + buffer_init(mempages); + page_cache_init(mempages); +#if defined(CONFIG_ARCH_S390) + ccwcache_init(); +#endif + signals_init(); +#ifdef CONFIG_PROC_FS + proc_root_init(); +#endif +#if defined(CONFIG_SYSVIPC) + ipc_init(); +#endif + check_bugs(); + printk("POSIX conformance testing by UNIFIX\n"); + + /* + * We count on the initial thread going ok + * Like idlers init is an unlocked kernel thread, which will + * make syscalls (and thus be locked). + */ + smp_init(); + rest_init(); +} + +#ifdef CONFIG_BLK_DEV_INITRD +static int do_linuxrc(void * shell) +{ + static char *argv[] = { "linuxrc", NULL, }; + + close(0);close(1);close(2); + setsid(); + (void) open("/dev/console",O_RDWR,0); + (void) dup(0); + (void) dup(0); + return execve(shell, argv, envp_init); +} + +#endif + +struct task_struct *child_reaper = &init_task; + +static void __init do_initcalls(void) +{ + initcall_t *call; + + call = &__initcall_start; + do { + (*call)(); + call++; + } while (call < &__initcall_end); + + /* Make sure there is no pending stuff from the initcall sequence */ + flush_scheduled_tasks(); +} + +/* + * Ok, the machine is now initialized. None of the devices + * have been touched yet, but the CPU subsystem is up and + * running, and memory and process management works. + * + * Now we can finally start doing some real work.. + */ +static void __init do_basic_setup(void) +{ + + /* + * Tell the world that we're going to be the grim + * reaper of innocent orphaned children. + * + * We don't want people to have to make incorrect + * assumptions about where in the task array this + * can be found. + */ + child_reaper = current; + +#if defined(CONFIG_MTRR) /* Do this after SMP initialization */ +/* + * We should probably create some architecture-dependent "fixup after + * everything is up" style function where this would belong better + * than in init/main.c.. + */ + mtrr_init(); +#endif + +#ifdef CONFIG_SYSCTL + sysctl_init(); +#endif + + /* + * Ok, at this point all CPU's should be initialized, so + * we can start looking into devices.. + */ +#if defined(CONFIG_ARCH_S390) + s390_init_machine_check(); +#endif + +#ifdef CONFIG_PCI + pci_init(); +#endif +#ifdef CONFIG_SBUS + sbus_init(); +#endif +#if defined(CONFIG_PPC) + ppc_init(); +#endif +#ifdef CONFIG_MCA + mca_init(); +#endif +#ifdef CONFIG_ARCH_ACORN + ecard_init(); +#endif +#ifdef CONFIG_ZORRO + zorro_init(); +#endif +#ifdef CONFIG_DIO + dio_init(); +#endif +#ifdef CONFIG_NUBUS + nubus_init(); +#endif +#ifdef CONFIG_ISAPNP + isapnp_init(); +#endif +#ifdef CONFIG_TC + tc_init(); +#endif + + /* Networking initialization needs a process context */ + sock_init(); + + start_context_thread(); + do_initcalls(); + +#ifdef CONFIG_IRDA + irda_proto_init(); + irda_device_init(); /* Must be done after protocol initialization */ +#endif +#ifdef CONFIG_PCMCIA + init_pcmcia_ds(); /* Do this last */ +#endif +} + +extern void rd_load(void); +extern void initrd_load(void); + +/* + * Prepare the namespace - decide what/where to mount, load ramdisks, etc. + */ +static void prepare_namespace(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + int real_root_mountflags = root_mountflags; + if (!initrd_start) + mount_initrd = 0; + if (mount_initrd) + root_mountflags &= ~MS_RDONLY; + real_root_dev = ROOT_DEV; +#endif + +#ifdef CONFIG_BLK_DEV_RAM +#ifdef CONFIG_BLK_DEV_INITRD + if (mount_initrd) + initrd_load(); + else +#endif + rd_load(); +#endif + + /* Mount the root filesystem.. */ + mount_root(); + + mount_devfs_fs (); + +#ifdef CONFIG_BLK_DEV_INITRD + root_mountflags = real_root_mountflags; + if (mount_initrd && ROOT_DEV != real_root_dev + && MAJOR(ROOT_DEV) == RAMDISK_MAJOR && MINOR(ROOT_DEV) == 0) { + int error; + int i, pid; + + pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); + if (pid > 0) { + while (pid != wait(&i)) { + current->policy |= SCHED_YIELD; + schedule(); + } + } + if (MAJOR(real_root_dev) != RAMDISK_MAJOR + || MINOR(real_root_dev) != 0) { + error = change_root(real_root_dev,"/initrd"); + if (error) + printk(KERN_ERR "Change root to /initrd: " + "error %d\n",error); + } + } +#endif +} + +static int init(void * unused) +{ + lock_kernel(); + do_basic_setup(); + + prepare_namespace(); + + /* + * Ok, we have completed the initial bootup, and + * we're essentially up and running. Get rid of the + * initmem segments and start the user-mode stuff.. + */ + free_initmem(); + unlock_kernel(); + + if (open("/dev/console", O_RDWR, 0) < 0) + printk("Warning: unable to open an initial console.\n"); + + (void) dup(0); + (void) dup(0); + + /* + * We try each of these until one succeeds. + * + * The Bourne shell can be used instead of init if we are + * trying to recover a really broken machine. + */ + + if (execute_command) + execve(execute_command,argv_init,envp_init); + execve("/sbin/init",argv_init,envp_init); + execve("/etc/init",argv_init,envp_init); + execve("/bin/init",argv_init,envp_init); + execve("/bin/sh",argv_init,envp_init); + panic("No init found. Try passing init= option to kernel."); +} |